diff options
Diffstat (limited to 'drivers/infiniband/core')
30 files changed, 6074 insertions, 1425 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index cb1ab3ea499..ffd0af6734a 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,14 +1,15 @@ -infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o +infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_cm.o  user_access-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_ucm.o  obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \ -					ib_cm.o iw_cm.o $(infiniband-y) +					ib_cm.o iw_cm.o ib_addr.o \ +					$(infiniband-y)  obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o  obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \  					$(user_access-y)  ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \ -				device.o fmr_pool.o cache.o +				device.o fmr_pool.o cache.o netlink.o  ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o  ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o @@ -17,7 +18,7 @@ ib_sa-y :=			sa_query.o multicast.o  ib_cm-y :=			cm.o -iw_cm-y :=			iwcm.o +iw_cm-y :=			iwcm.o iwpm_util.o iwpm_msg.o  rdma_cm-y :=			cma.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 8aba0ba57de..8172d37f9ad 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -37,6 +37,7 @@  #include <linux/inetdevice.h>  #include <linux/slab.h>  #include <linux/workqueue.h> +#include <linux/module.h>  #include <net/arp.h>  #include <net/neighbour.h>  #include <net/route.h> @@ -44,6 +45,7 @@  #include <net/addrconf.h>  #include <net/ip6_route.h>  #include <rdma/ib_addr.h> +#include <rdma/ib.h>  MODULE_AUTHOR("Sean Hefty");  MODULE_DESCRIPTION("IB Address Translation"); @@ -69,6 +71,23 @@ static LIST_HEAD(req_list);  static DECLARE_DELAYED_WORK(work, process_req);  static struct workqueue_struct *addr_wq; +int rdma_addr_size(struct sockaddr *addr) +{ +	switch (addr->sa_family) { +	case AF_INET: +		return sizeof(struct sockaddr_in); +	case AF_INET6: +		return sizeof(struct sockaddr_in6); +	case AF_IB: +		return sizeof(struct sockaddr_ib); +	default: +		return 0; +	} +} +EXPORT_SYMBOL(rdma_addr_size); + +static struct rdma_addr_client self; +  void rdma_addr_register_client(struct rdma_addr_client *client)  {  	atomic_set(&client->refcount, 1); @@ -102,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,  }  EXPORT_SYMBOL(rdma_copy_addr); -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, +		      u16 *vlan_id)  {  	struct net_device *dev;  	int ret = -EADDRNOTAVAIL; @@ -125,10 +145,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)  			return ret;  		ret = rdma_copy_addr(dev_addr, dev, NULL); +		if (vlan_id) +			*vlan_id = rdma_vlan_dev_vlan_id(dev);  		dev_put(dev);  		break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  	case AF_INET6:  		rcu_read_lock();  		for_each_netdev_rcu(&init_net, dev) { @@ -136,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)  					  &((struct sockaddr_in6 *) addr)->sin6_addr,  					  dev, 1)) {  				ret = rdma_copy_addr(dev_addr, dev, NULL); +				if (vlan_id) +					*vlan_id = rdma_vlan_dev_vlan_id(dev);  				break;  			}  		} @@ -151,13 +175,11 @@ static void set_timeout(unsigned long time)  {  	unsigned long delay; -	cancel_delayed_work(&work); -  	delay = time - jiffies;  	if ((long)delay <= 0)  		delay = 1; -	queue_delayed_work(addr_wq, &work, delay); +	mod_delayed_work(addr_wq, &work, delay);  }  static void queue_req(struct addr_req *req) @@ -177,31 +199,53 @@ static void queue_req(struct addr_req *req)  	mutex_unlock(&lock);  } +static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr) +{ +	struct neighbour *n; +	int ret; + +	n = dst_neigh_lookup(dst, daddr); + +	rcu_read_lock(); +	if (!n || !(n->nud_state & NUD_VALID)) { +		if (n) +			neigh_event_send(n, NULL); +		ret = -ENODATA; +	} else { +		ret = rdma_copy_addr(dev_addr, dst->dev, n->ha); +	} +	rcu_read_unlock(); + +	if (n) +		neigh_release(n); + +	return ret; +} +  static int addr4_resolve(struct sockaddr_in *src_in,  			 struct sockaddr_in *dst_in,  			 struct rdma_dev_addr *addr)  {  	__be32 src_ip = src_in->sin_addr.s_addr;  	__be32 dst_ip = dst_in->sin_addr.s_addr; -	struct flowi fl;  	struct rtable *rt; -	struct neighbour *neigh; +	struct flowi4 fl4;  	int ret; -	memset(&fl, 0, sizeof fl); -	fl.nl_u.ip4_u.daddr = dst_ip; -	fl.nl_u.ip4_u.saddr = src_ip; -	fl.oif = addr->bound_dev_if; - -	ret = ip_route_output_key(&init_net, &rt, &fl); -	if (ret) +	memset(&fl4, 0, sizeof(fl4)); +	fl4.daddr = dst_ip; +	fl4.saddr = src_ip; +	fl4.flowi4_oif = addr->bound_dev_if; +	rt = ip_route_output_key(&init_net, &fl4); +	if (IS_ERR(rt)) { +		ret = PTR_ERR(rt);  		goto out; - +	}  	src_in->sin_family = AF_INET; -	src_in->sin_addr.s_addr = rt->rt_src; +	src_in->sin_addr.s_addr = fl4.saddr;  	if (rt->dst.dev->flags & IFF_LOOPBACK) { -		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); +		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);  		if (!ret)  			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);  		goto put; @@ -209,59 +253,47 @@ static int addr4_resolve(struct sockaddr_in *src_in,  	/* If the device does ARP internally, return 'done' */  	if (rt->dst.dev->flags & IFF_NOARP) { -		rdma_copy_addr(addr, rt->dst.dev, NULL); -		goto put; -	} - -	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev); -	if (!neigh || !(neigh->nud_state & NUD_VALID)) { -		neigh_event_send(rt->dst.neighbour, NULL); -		ret = -ENODATA; -		if (neigh) -			goto release; +		ret = rdma_copy_addr(addr, rt->dst.dev, NULL);  		goto put;  	} -	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); -release: -	neigh_release(neigh); +	ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);  put:  	ip_rt_put(rt);  out:  	return ret;  } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  static int addr6_resolve(struct sockaddr_in6 *src_in,  			 struct sockaddr_in6 *dst_in,  			 struct rdma_dev_addr *addr)  { -	struct flowi fl; -	struct neighbour *neigh; +	struct flowi6 fl6;  	struct dst_entry *dst;  	int ret; -	memset(&fl, 0, sizeof fl); -	ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr); -	ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr); -	fl.oif = addr->bound_dev_if; +	memset(&fl6, 0, sizeof fl6); +	fl6.daddr = dst_in->sin6_addr; +	fl6.saddr = src_in->sin6_addr; +	fl6.flowi6_oif = addr->bound_dev_if; -	dst = ip6_route_output(&init_net, NULL, &fl); +	dst = ip6_route_output(&init_net, NULL, &fl6);  	if ((ret = dst->error))  		goto put; -	if (ipv6_addr_any(&fl.fl6_src)) { +	if (ipv6_addr_any(&fl6.saddr)) {  		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, -					 &fl.fl6_dst, 0, &fl.fl6_src); +					 &fl6.daddr, 0, &fl6.saddr);  		if (ret)  			goto put;  		src_in->sin6_family = AF_INET6; -		ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src); +		src_in->sin6_addr = fl6.saddr;  	}  	if (dst->dev->flags & IFF_LOOPBACK) { -		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); +		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);  		if (!ret)  			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);  		goto put; @@ -273,14 +305,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,  		goto put;  	} -	neigh = dst->neighbour; -	if (!neigh || !(neigh->nud_state & NUD_VALID)) { -		neigh_event_send(dst->neighbour, NULL); -		ret = -ENODATA; -		goto put; -	} - -	ret = rdma_copy_addr(addr, dst->dev, neigh->ha); +	ret = dst_fetch_ha(dst, addr, &fl6.daddr);  put:  	dst_release(dst);  	return ret; @@ -367,12 +392,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client,  			goto err;  		} -		memcpy(src_in, src_addr, ip_addr_size(src_addr)); +		memcpy(src_in, src_addr, rdma_addr_size(src_addr));  	} else {  		src_in->sa_family = dst_addr->sa_family;  	} -	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); +	memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));  	req->addr = addr;  	req->callback = callback;  	req->context = context; @@ -419,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)  }  EXPORT_SYMBOL(rdma_addr_cancel); +struct resolve_cb_context { +	struct rdma_dev_addr *addr; +	struct completion comp; +}; + +static void resolve_cb(int status, struct sockaddr *src_addr, +	     struct rdma_dev_addr *addr, void *context) +{ +	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct +				rdma_dev_addr)); +	complete(&((struct resolve_cb_context *)context)->comp); +} + +int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, +			       u16 *vlan_id) +{ +	int ret = 0; +	struct rdma_dev_addr dev_addr; +	struct resolve_cb_context ctx; +	struct net_device *dev; + +	union { +		struct sockaddr     _sockaddr; +		struct sockaddr_in  _sockaddr_in; +		struct sockaddr_in6 _sockaddr_in6; +	} sgid_addr, dgid_addr; + + +	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); +	if (ret) +		return ret; + +	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); +	if (ret) +		return ret; + +	memset(&dev_addr, 0, sizeof(dev_addr)); + +	ctx.addr = &dev_addr; +	init_completion(&ctx.comp); +	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, +			&dev_addr, 1000, resolve_cb, &ctx); +	if (ret) +		return ret; + +	wait_for_completion(&ctx.comp); + +	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); +	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); +	if (!dev) +		return -ENODEV; +	if (vlan_id) +		*vlan_id = rdma_vlan_dev_vlan_id(dev); +	dev_put(dev); +	return ret; +} +EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); + +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) +{ +	int ret = 0; +	struct rdma_dev_addr dev_addr; +	union { +		struct sockaddr     _sockaddr; +		struct sockaddr_in  _sockaddr_in; +		struct sockaddr_in6 _sockaddr_in6; +	} gid_addr; + +	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); + +	if (ret) +		return ret; +	memset(&dev_addr, 0, sizeof(dev_addr)); +	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); +	if (ret) +		return ret; + +	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); +	return ret; +} +EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); +  static int netevent_callback(struct notifier_block *self, unsigned long event,  	void *ctx)  { @@ -443,11 +550,13 @@ static int __init addr_init(void)  		return -ENOMEM;  	register_netevent_notifier(&nb); +	rdma_addr_register_client(&self);  	return 0;  }  static void __exit addr_cleanup(void)  { +	rdma_addr_unregister_client(&self);  	unregister_netevent_notifier(&nb);  	destroy_workqueue(addr_wq);  } diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 91916a8d5de..2bc7f5af64f 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -101,7 +101,8 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,  	agent = port_priv->agent[qpn];  	ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);  	if (IS_ERR(ah)) { -		printk(KERN_ERR SPFX "ib_create_ah_from_wc error\n"); +		printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n", +			PTR_ERR(ah));  		return;  	} diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 68883565b72..80f6cf2449f 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -167,6 +167,7 @@ int ib_find_cached_pkey(struct ib_device *device,  	unsigned long flags;  	int i;  	int ret = -ENOENT; +	int partial_ix = -1;  	if (port_num < start_port(device) || port_num > end_port(device))  		return -EINVAL; @@ -179,6 +180,46 @@ int ib_find_cached_pkey(struct ib_device *device,  	for (i = 0; i < cache->table_len; ++i)  		if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { +			if (cache->table[i] & 0x8000) { +				*index = i; +				ret = 0; +				break; +			} else +				partial_ix = i; +		} + +	if (ret && partial_ix >= 0) { +		*index = partial_ix; +		ret = 0; +	} + +	read_unlock_irqrestore(&device->cache.lock, flags); + +	return ret; +} +EXPORT_SYMBOL(ib_find_cached_pkey); + +int ib_find_exact_cached_pkey(struct ib_device *device, +			      u8                port_num, +			      u16               pkey, +			      u16              *index) +{ +	struct ib_pkey_cache *cache; +	unsigned long flags; +	int i; +	int ret = -ENOENT; + +	if (port_num < start_port(device) || port_num > end_port(device)) +		return -EINVAL; + +	read_lock_irqsave(&device->cache.lock, flags); + +	cache = device->cache.pkey_cache[port_num - start_port(device)]; + +	*index = -1; + +	for (i = 0; i < cache->table_len; ++i) +		if (cache->table[i] == pkey) {  			*index = i;  			ret = 0;  			break; @@ -188,7 +229,7 @@ int ib_find_cached_pkey(struct ib_device *device,  	return ret;  } -EXPORT_SYMBOL(ib_find_cached_pkey); +EXPORT_SYMBOL(ib_find_exact_cached_pkey);  int ib_get_cached_lmc(struct ib_device *device,  		      u8                port_num, @@ -302,13 +343,14 @@ static void ib_cache_event(struct ib_event_handler *handler,  	    event->event == IB_EVENT_LID_CHANGE  ||  	    event->event == IB_EVENT_PKEY_CHANGE ||  	    event->event == IB_EVENT_SM_CHANGE   || -	    event->event == IB_EVENT_CLIENT_REREGISTER) { +	    event->event == IB_EVENT_CLIENT_REREGISTER || +	    event->event == IB_EVENT_GID_CHANGE) {  		work = kmalloc(sizeof *work, GFP_ATOMIC);  		if (work) {  			INIT_WORK(&work->work, ib_cache_task);  			work->device   = event->device;  			work->port_num = event->element.port_num; -			schedule_work(&work->work); +			queue_work(ib_wq, &work->work);  		}  	}  } @@ -368,7 +410,7 @@ static void ib_cache_cleanup_one(struct ib_device *device)  	int p;  	ib_unregister_event_handler(&device->cache.event_handler); -	flush_scheduled_work(); +	flush_workqueue(ib_wq);  	for (p = 0; p <= end_port(device) - start_port(device); ++p) {  		kfree(device->cache.pkey_cache[p]); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 64e0903091a..c3239170d8b 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -36,6 +36,7 @@  #include <linux/completion.h>  #include <linux/dma-mapping.h>  #include <linux/device.h> +#include <linux/module.h>  #include <linux/err.h>  #include <linux/idr.h>  #include <linux/interrupt.h> @@ -46,6 +47,7 @@  #include <linux/sysfs.h>  #include <linux/workqueue.h>  #include <linux/kdev_t.h> +#include <linux/etherdevice.h>  #include <rdma/ib_cache.h>  #include <rdma/ib_cm.h> @@ -176,6 +178,8 @@ struct cm_av {  	struct ib_ah_attr ah_attr;  	u16 pkey_index;  	u8 timeout; +	u8  valid; +	u8  smac[ETH_ALEN];  };  struct cm_work { @@ -375,26 +379,27 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)  	ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,  			     &av->ah_attr);  	av->timeout = path->packet_life_time + 1; +	memcpy(av->smac, path->smac, sizeof(av->smac)); + +	av->valid = 1;  	return 0;  }  static int cm_alloc_id(struct cm_id_private *cm_id_priv)  {  	unsigned long flags; -	int ret, id; -	static int next_id; +	int id; -	do { -		spin_lock_irqsave(&cm.lock, flags); -		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, -					next_id, &id); -		if (!ret) -			next_id = ((unsigned) id + 1) & MAX_ID_MASK; -		spin_unlock_irqrestore(&cm.lock, flags); -	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); +	idr_preload(GFP_KERNEL); +	spin_lock_irqsave(&cm.lock, flags); + +	id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); + +	spin_unlock_irqrestore(&cm.lock, flags); +	idr_preload_end();  	cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; -	return ret; +	return id < 0 ? id : 0;  }  static void cm_free_id(__be32 local_id) @@ -889,6 +894,8 @@ retest:  		break;  	case IB_CM_ESTABLISHED:  		spin_unlock_irq(&cm_id_priv->lock); +		if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) +			break;  		ib_send_cm_dreq(cm_id, NULL, 0);  		goto retest;  	case IB_CM_DREQ_SENT: @@ -1008,7 +1015,6 @@ static void cm_format_req(struct cm_req_msg *req_msg,  	req_msg->service_id = param->service_id;  	req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;  	cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); -	cm_req_set_resp_res(req_msg, param->responder_resources);  	cm_req_set_init_depth(req_msg, param->initiator_depth);  	cm_req_set_remote_resp_timeout(req_msg,  				       param->remote_cm_response_timeout); @@ -1017,12 +1023,16 @@ static void cm_format_req(struct cm_req_msg *req_msg,  	cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));  	cm_req_set_local_resp_timeout(req_msg,  				      param->local_cm_response_timeout); -	cm_req_set_retry_count(req_msg, param->retry_count);  	req_msg->pkey = param->primary_path->pkey;  	cm_req_set_path_mtu(req_msg, param->primary_path->mtu); -	cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);  	cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); -	cm_req_set_srq(req_msg, param->srq); + +	if (param->qp_type != IB_QPT_XRC_INI) { +		cm_req_set_resp_res(req_msg, param->responder_resources); +		cm_req_set_retry_count(req_msg, param->retry_count); +		cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count); +		cm_req_set_srq(req_msg, param->srq); +	}  	if (pri_path->hop_limit <= 1) {  		req_msg->primary_local_lid = pri_path->slid; @@ -1080,7 +1090,8 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)  	if (!param->primary_path)  		return -EINVAL; -	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC) +	if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && +	    param->qp_type != IB_QPT_XRC_INI)  		return -EINVAL;  	if (param->private_data && @@ -1549,6 +1560,9 @@ static int cm_req_handler(struct cm_work *work)  	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);  	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); + +	memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); +	work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;  	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);  	if (ret) {  		ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -1601,18 +1615,24 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,  	cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);  	rep_msg->local_comm_id = cm_id_priv->id.local_id;  	rep_msg->remote_comm_id = cm_id_priv->id.remote_id; -	cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));  	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));  	rep_msg->resp_resources = param->responder_resources; -	rep_msg->initiator_depth = param->initiator_depth;  	cm_rep_set_target_ack_delay(rep_msg,  				    cm_id_priv->av.port->cm_dev->ack_delay);  	cm_rep_set_failover(rep_msg, param->failover_accepted); -	cm_rep_set_flow_ctrl(rep_msg, param->flow_control);  	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); -	cm_rep_set_srq(rep_msg, param->srq);  	rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; +	if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) { +		rep_msg->initiator_depth = param->initiator_depth; +		cm_rep_set_flow_ctrl(rep_msg, param->flow_control); +		cm_rep_set_srq(rep_msg, param->srq); +		cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num)); +	} else { +		cm_rep_set_srq(rep_msg, 1); +		cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num)); +	} +  	if (param->private_data && param->private_data_len)  		memcpy(rep_msg->private_data, param->private_data,  		       param->private_data_len); @@ -1660,7 +1680,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,  	cm_id_priv->initiator_depth = param->initiator_depth;  	cm_id_priv->responder_resources = param->responder_resources;  	cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); -	cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg); +	cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);  out:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);  	return ret; @@ -1731,7 +1751,7 @@ error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);  }  EXPORT_SYMBOL(ib_send_cm_rtu); -static void cm_format_rep_event(struct cm_work *work) +static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)  {  	struct cm_rep_msg *rep_msg;  	struct ib_cm_rep_event_param *param; @@ -1740,7 +1760,7 @@ static void cm_format_rep_event(struct cm_work *work)  	param = &work->cm_event.param.rep_rcvd;  	param->remote_ca_guid = rep_msg->local_ca_guid;  	param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); -	param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg)); +	param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));  	param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));  	param->responder_resources = rep_msg->initiator_depth;  	param->initiator_depth = rep_msg->resp_resources; @@ -1808,7 +1828,7 @@ static int cm_rep_handler(struct cm_work *work)  		return -EINVAL;  	} -	cm_format_rep_event(work); +	cm_format_rep_event(work, cm_id_priv->qp_type);  	spin_lock_irq(&cm_id_priv->lock);  	switch (cm_id_priv->id.state) { @@ -1823,7 +1843,7 @@ static int cm_rep_handler(struct cm_work *work)  	cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;  	cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; -	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg); +	cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);  	spin_lock(&cm.lock);  	/* Check for duplicate REP. */ @@ -1850,7 +1870,7 @@ static int cm_rep_handler(struct cm_work *work)  	cm_id_priv->id.state = IB_CM_REP_RCVD;  	cm_id_priv->id.remote_id = rep_msg->local_comm_id; -	cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg); +	cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);  	cm_id_priv->initiator_depth = rep_msg->resp_resources;  	cm_id_priv->responder_resources = rep_msg->initiator_depth;  	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); @@ -1988,6 +2008,10 @@ int ib_send_cm_dreq(struct ib_cm_id *cm_id,  		goto out;  	} +	if (cm_id->lap_state == IB_CM_LAP_SENT || +	    cm_id->lap_state == IB_CM_MRA_LAP_RCVD) +		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); +  	ret = cm_alloc_msg(cm_id_priv, &msg);  	if (ret) {  		cm_enter_timewait(cm_id_priv); @@ -2129,6 +2153,10 @@ static int cm_dreq_handler(struct cm_work *work)  		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);  		break;  	case IB_CM_ESTABLISHED: +		if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT || +		    cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD) +			ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg); +		break;  	case IB_CM_MRA_REP_RCVD:  		break;  	case IB_CM_TIMEWAIT: @@ -2349,9 +2377,18 @@ static int cm_rej_handler(struct cm_work *work)  		/* fall through */  	case IB_CM_REP_RCVD:  	case IB_CM_MRA_REP_SENT: -	case IB_CM_ESTABLISHED:  		cm_enter_timewait(cm_id_priv);  		break; +	case IB_CM_ESTABLISHED: +		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT || +		    cm_id_priv->id.lap_state == IB_CM_LAP_SENT) { +			if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT) +				ib_cancel_mad(cm_id_priv->av.port->mad_agent, +					      cm_id_priv->msg); +			cm_enter_timewait(cm_id_priv); +			break; +		} +		/* fall through */  	default:  		spin_unlock_irq(&cm_id_priv->lock);  		ret = -EINVAL; @@ -2989,6 +3026,7 @@ static int cm_sidr_req_handler(struct cm_work *work)  		goto out; /* No match. */  	}  	atomic_inc(&cur_cm_id_priv->refcount); +	atomic_inc(&cm_id_priv->refcount);  	spin_unlock_irq(&cm.lock);  	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler; @@ -3471,10 +3509,37 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,  		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |  				IB_QP_DEST_QPN | IB_QP_RQ_PSN;  		qp_attr->ah_attr = cm_id_priv->av.ah_attr; +		if (!cm_id_priv->av.valid) { +			spin_unlock_irqrestore(&cm_id_priv->lock, flags); +			return -EINVAL; +		} +		if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) { +			qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id; +			*qp_attr_mask |= IB_QP_VID; +		} +		if (!is_zero_ether_addr(cm_id_priv->av.smac)) { +			memcpy(qp_attr->smac, cm_id_priv->av.smac, +			       sizeof(qp_attr->smac)); +			*qp_attr_mask |= IB_QP_SMAC; +		} +		if (cm_id_priv->alt_av.valid) { +			if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) { +				qp_attr->alt_vlan_id = +					cm_id_priv->alt_av.ah_attr.vlan_id; +				*qp_attr_mask |= IB_QP_ALT_VID; +			} +			if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) { +				memcpy(qp_attr->alt_smac, +				       cm_id_priv->alt_av.smac, +				       sizeof(qp_attr->alt_smac)); +				*qp_attr_mask |= IB_QP_ALT_SMAC; +			} +		}  		qp_attr->path_mtu = cm_id_priv->path_mtu;  		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);  		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); -		if (cm_id_priv->qp_type == IB_QPT_RC) { +		if (cm_id_priv->qp_type == IB_QPT_RC || +		    cm_id_priv->qp_type == IB_QPT_XRC_TGT) {  			*qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |  					 IB_QP_MIN_RNR_TIMER;  			qp_attr->max_dest_rd_atomic = @@ -3519,15 +3584,21 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,  		if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {  			*qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;  			qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); -			if (cm_id_priv->qp_type == IB_QPT_RC) { -				*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | -						 IB_QP_RNR_RETRY | +			switch (cm_id_priv->qp_type) { +			case IB_QPT_RC: +			case IB_QPT_XRC_INI: +				*qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |  						 IB_QP_MAX_QP_RD_ATOMIC; -				qp_attr->timeout = cm_id_priv->av.timeout;  				qp_attr->retry_cnt = cm_id_priv->retry_count;  				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; -				qp_attr->max_rd_atomic = -					cm_id_priv->initiator_depth; +				qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; +				/* fall through */ +			case IB_QPT_XRC_TGT: +				*qp_attr_mask |= IB_QP_TIMEOUT; +				qp_attr->timeout = cm_id_priv->av.timeout; +				break; +			default: +				break;  			}  			if (cm_id_priv->alt_av.ah_attr.dlid) {  				*qp_attr_mask |= IB_QP_PATH_MIG_STATE; @@ -3621,8 +3692,17 @@ static struct kobj_type cm_port_obj_type = {  	.release = cm_release_port_obj  }; +static char *cm_devnode(struct device *dev, umode_t *mode) +{ +	if (mode) +		*mode = 0666; +	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} +  struct class cm_class = { +	.owner   = THIS_MODULE,  	.name    = "infiniband_cm", +	.devnode = cm_devnode,  };  EXPORT_SYMBOL(cm_class); @@ -3797,28 +3877,31 @@ static int __init ib_cm_init(void)  	cm.remote_sidr_table = RB_ROOT;  	idr_init(&cm.local_id_table);  	get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); -	idr_pre_get(&cm.local_id_table, GFP_KERNEL);  	INIT_LIST_HEAD(&cm.timewait_list);  	ret = class_register(&cm_class); -	if (ret) -		return -ENOMEM; +	if (ret) { +		ret = -ENOMEM; +		goto error1; +	}  	cm.wq = create_workqueue("ib_cm");  	if (!cm.wq) {  		ret = -ENOMEM; -		goto error1; +		goto error2;  	}  	ret = ib_register_client(&cm_client);  	if (ret) -		goto error2; +		goto error3;  	return 0; -error2: +error3:  	destroy_workqueue(cm.wq); -error1: +error2:  	class_unregister(&cm_class); +error1: +	idr_destroy(&cm.local_id_table);  	return ret;  } diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 7e63c08f697..be068f47e47 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -1,5 +1,5 @@  /* - * Copyright (c) 2004 Intel Corporation.  All rights reserved. + * Copyright (c) 2004, 2011 Intel Corporation.  All rights reserved.   * Copyright (c) 2004 Topspin Corporation.  All rights reserved.   * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.   * @@ -44,18 +44,6 @@  #define IB_CM_CLASS_VERSION	2 /* IB specification 1.2 */ -#define CM_REQ_ATTR_ID		cpu_to_be16(0x0010) -#define CM_MRA_ATTR_ID		cpu_to_be16(0x0011) -#define CM_REJ_ATTR_ID		cpu_to_be16(0x0012) -#define CM_REP_ATTR_ID		cpu_to_be16(0x0013) -#define CM_RTU_ATTR_ID		cpu_to_be16(0x0014) -#define CM_DREQ_ATTR_ID		cpu_to_be16(0x0015) -#define CM_DREP_ATTR_ID		cpu_to_be16(0x0016) -#define CM_SIDR_REQ_ATTR_ID	cpu_to_be16(0x0017) -#define CM_SIDR_REP_ATTR_ID	cpu_to_be16(0x0018) -#define CM_LAP_ATTR_ID		cpu_to_be16(0x0019) -#define CM_APR_ATTR_ID		cpu_to_be16(0x001A) -  enum cm_msg_sequence {  	CM_MSG_SEQUENCE_REQ,  	CM_MSG_SEQUENCE_LAP, @@ -86,7 +74,7 @@ struct cm_req_msg {  	__be16 pkey;  	/* path MTU:4, RDC exists:1, RNR retry count:3. */  	u8 offset50; -	/* max CM Retries:4, SRQ:1, rsvd:3 */ +	/* max CM Retries:4, SRQ:1, extended transport type:3 */  	u8 offset51;  	__be16 primary_local_lid; @@ -175,6 +163,11 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)  	switch(transport_type) {  	case 0: return IB_QPT_RC;  	case 1: return IB_QPT_UC; +	case 3: +		switch (req_msg->offset51 & 0x7) { +		case 1: return IB_QPT_XRC_TGT; +		default: return 0; +		}  	default: return 0;  	}  } @@ -188,6 +181,12 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,  						  req_msg->offset40) &  						   0xFFFFFFF9) | 0x2);  		break; +	case IB_QPT_XRC_INI: +		req_msg->offset40 = cpu_to_be32((be32_to_cpu( +						 req_msg->offset40) & +						   0xFFFFFFF9) | 0x6); +		req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1; +		break;  	default:  		req_msg->offset40 = cpu_to_be32(be32_to_cpu(  						 req_msg->offset40) & @@ -527,6 +526,23 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)  			    (be32_to_cpu(rep_msg->offset12) & 0x000000FF));  } +static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg) +{ +	return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8); +} + +static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn) +{ +	rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) | +			    (be32_to_cpu(rep_msg->offset16) & 0x000000FF)); +} + +static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type) +{ +	return (qp_type == IB_QPT_XRC_INI) ? +		cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg); +} +  static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)  {  	return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); @@ -771,6 +787,7 @@ struct cm_apr_msg {  	u8 info_length;  	u8 ap_status; +	__be16 rsvd;  	u8 info[IB_CM_APR_INFO_LENGTH];  	u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6884da24fde..d570030d899 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -41,12 +41,16 @@  #include <linux/idr.h>  #include <linux/inetdevice.h>  #include <linux/slab.h> +#include <linux/module.h> +#include <net/route.h>  #include <net/tcp.h>  #include <net/ipv6.h>  #include <rdma/rdma_cm.h>  #include <rdma/rdma_cm_ib.h> +#include <rdma/rdma_netlink.h> +#include <rdma/ib.h>  #include <rdma/ib_cache.h>  #include <rdma/ib_cm.h>  #include <rdma/ib_sa.h> @@ -76,10 +80,10 @@ static LIST_HEAD(dev_list);  static LIST_HEAD(listen_any_list);  static DEFINE_MUTEX(lock);  static struct workqueue_struct *cma_wq; -static DEFINE_IDR(sdp_ps);  static DEFINE_IDR(tcp_ps);  static DEFINE_IDR(udp_ps);  static DEFINE_IDR(ipoib_ps); +static DEFINE_IDR(ib_ps);  struct cma_device {  	struct list_head	list; @@ -89,26 +93,16 @@ struct cma_device {  	struct list_head	id_list;  }; -enum cma_state { -	CMA_IDLE, -	CMA_ADDR_QUERY, -	CMA_ADDR_RESOLVED, -	CMA_ROUTE_QUERY, -	CMA_ROUTE_RESOLVED, -	CMA_CONNECT, -	CMA_DISCONNECT, -	CMA_ADDR_BOUND, -	CMA_LISTEN, -	CMA_DEVICE_REMOVAL, -	CMA_DESTROYING -}; -  struct rdma_bind_list {  	struct idr		*ps;  	struct hlist_head	owners;  	unsigned short		port;  }; +enum { +	CMA_OPTION_AFONLY, +}; +  /*   * Device removal can occur at anytime, so we need extra handling to   * serialize notifying the user of device removal with other callbacks. @@ -126,7 +120,7 @@ struct rdma_id_private {  	struct list_head	mc_list;  	int			internal_id; -	enum cma_state		state; +	enum rdma_cm_state	state;  	spinlock_t		lock;  	struct mutex		qp_mutex; @@ -146,8 +140,12 @@ struct rdma_id_private {  	u32			seq_num;  	u32			qkey;  	u32			qp_num; +	pid_t			owner; +	u32			options;  	u8			srq;  	u8			tos; +	u8			reuseaddr; +	u8			afonly;  };  struct cma_multicast { @@ -164,8 +162,8 @@ struct cma_multicast {  struct cma_work {  	struct work_struct	work;  	struct rdma_id_private	*id; -	enum cma_state		old_state; -	enum cma_state		new_state; +	enum rdma_cm_state	old_state; +	enum rdma_cm_state	new_state;  	struct rdma_cm_event	event;  }; @@ -197,26 +195,9 @@ struct cma_hdr {  	union cma_ip_addr dst_addr;  }; -struct sdp_hh { -	u8 bsdh[16]; -	u8 sdp_version; /* Major version: 7:4 */ -	u8 ip_version;	/* IP version: 7:4 */ -	u8 sdp_specific1[10]; -	__be16 port; -	__be16 sdp_specific2; -	union cma_ip_addr src_addr; -	union cma_ip_addr dst_addr; -}; - -struct sdp_hah { -	u8 bsdh[16]; -	u8 sdp_version; -}; -  #define CMA_VERSION 0x00 -#define SDP_MAJ_VERSION 0x2 -static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp) +static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)  {  	unsigned long flags;  	int ret; @@ -228,7 +209,7 @@ static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)  }  static int cma_comp_exch(struct rdma_id_private *id_priv, -			 enum cma_state comp, enum cma_state exch) +			 enum rdma_cm_state comp, enum rdma_cm_state exch)  {  	unsigned long flags;  	int ret; @@ -240,11 +221,11 @@ static int cma_comp_exch(struct rdma_id_private *id_priv,  	return ret;  } -static enum cma_state cma_exch(struct rdma_id_private *id_priv, -			       enum cma_state exch) +static enum rdma_cm_state cma_exch(struct rdma_id_private *id_priv, +				   enum rdma_cm_state exch)  {  	unsigned long flags; -	enum cma_state old; +	enum rdma_cm_state old;  	spin_lock_irqsave(&id_priv->lock, flags);  	old = id_priv->state; @@ -263,26 +244,6 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)  	hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);  } -static inline u8 sdp_get_majv(u8 sdp_version) -{ -	return sdp_version >> 4; -} - -static inline u8 sdp_get_ip_ver(struct sdp_hh *hh) -{ -	return hh->ip_version >> 4; -} - -static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) -{ -	hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); -} - -static inline int cma_is_ud_ps(enum rdma_port_space ps) -{ -	return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB); -} -  static void cma_attach_to_dev(struct rdma_id_private *id_priv,  			      struct cma_device *cma_dev)  { @@ -308,23 +269,49 @@ static inline void release_mc(struct kref *kref)  	kfree(mc);  } -static void cma_detach_from_dev(struct rdma_id_private *id_priv) +static void cma_release_dev(struct rdma_id_private *id_priv)  { +	mutex_lock(&lock);  	list_del(&id_priv->list);  	cma_deref_dev(id_priv->cma_dev);  	id_priv->cma_dev = NULL; +	mutex_unlock(&lock); +} + +static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) +{ +	return (struct sockaddr *) &id_priv->id.route.addr.src_addr; +} + +static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) +{ +	return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; +} + +static inline unsigned short cma_family(struct rdma_id_private *id_priv) +{ +	return id_priv->id.route.addr.src_addr.ss_family;  } -static int cma_set_qkey(struct rdma_id_private *id_priv) +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)  {  	struct ib_sa_mcmember_rec rec;  	int ret = 0; -	if (id_priv->qkey) +	if (id_priv->qkey) { +		if (qkey && id_priv->qkey != qkey) +			return -EINVAL;  		return 0; +	} + +	if (qkey) { +		id_priv->qkey = qkey; +		return 0; +	}  	switch (id_priv->id.ps) {  	case RDMA_PS_UDP: +	case RDMA_PS_IB:  		id_priv->qkey = RDMA_UDP_QKEY;  		break;  	case RDMA_PS_IPOIB: @@ -341,55 +328,83 @@ static int cma_set_qkey(struct rdma_id_private *id_priv)  	return ret;  } -static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num) +static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)  { -	int i; -	int err; -	struct ib_port_attr props; -	union ib_gid tmp; +	dev_addr->dev_type = ARPHRD_INFINIBAND; +	rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); +	ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); +} -	err = ib_query_port(device, port_num, &props); -	if (err) -		return 1; +static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +{ +	int ret; -	for (i = 0; i < props.gid_tbl_len; ++i) { -		err = ib_query_gid(device, port_num, i, &tmp); -		if (err) -			return 1; -		if (!memcmp(&tmp, gid, sizeof tmp)) -			return 0; +	if (addr->sa_family != AF_IB) { +		ret = rdma_translate_ip(addr, dev_addr, NULL); +	} else { +		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); +		ret = 0;  	} -	return -EAGAIN; +	return ret;  } -static int cma_acquire_dev(struct rdma_id_private *id_priv) +static int cma_acquire_dev(struct rdma_id_private *id_priv, +			   struct rdma_id_private *listen_id_priv)  {  	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;  	struct cma_device *cma_dev;  	union ib_gid gid, iboe_gid;  	int ret = -ENODEV; -	u8 port; +	u8 port, found_port;  	enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?  		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; -	iboe_addr_get_sgid(dev_addr, &iboe_gid); +	if (dev_ll != IB_LINK_LAYER_INFINIBAND && +	    id_priv->id.ps == RDMA_PS_IPOIB) +		return -EINVAL; + +	mutex_lock(&lock); +	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, +		    &iboe_gid); +  	memcpy(&gid, dev_addr->src_dev_addr +  	       rdma_addr_gid_offset(dev_addr), sizeof gid); +	if (listen_id_priv && +	    rdma_port_get_link_layer(listen_id_priv->id.device, +				     listen_id_priv->id.port_num) == dev_ll) { +		cma_dev = listen_id_priv->cma_dev; +		port = listen_id_priv->id.port_num; +		if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && +		    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) +			ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, +						 &found_port, NULL); +		else +			ret = ib_find_cached_gid(cma_dev->device, &gid, +						 &found_port, NULL); + +		if (!ret && (port  == found_port)) { +			id_priv->id.port_num = found_port; +			goto out; +		} +	}  	list_for_each_entry(cma_dev, &dev_list, list) {  		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { +			if (listen_id_priv && +			    listen_id_priv->cma_dev == cma_dev && +			    listen_id_priv->id.port_num == port) +				continue;  			if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {  				if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&  				    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) -					ret = find_gid_port(cma_dev->device, &iboe_gid, port); +					ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);  				else -					ret = find_gid_port(cma_dev->device, &gid, port); +					ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL); -				if (!ret) { -					id_priv->id.port_num = port; +				if (!ret && (port == found_port)) { +					id_priv->id.port_num = found_port;  					goto out; -				} else if (ret == 1) -					break; +				}  			}  		}  	} @@ -398,9 +413,64 @@ out:  	if (!ret)  		cma_attach_to_dev(id_priv, cma_dev); +	mutex_unlock(&lock);  	return ret;  } +/* + * Select the source IB device and address to reach the destination IB address. + */ +static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) +{ +	struct cma_device *cma_dev, *cur_dev; +	struct sockaddr_ib *addr; +	union ib_gid gid, sgid, *dgid; +	u16 pkey, index; +	u8 p; +	int i; + +	cma_dev = NULL; +	addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); +	dgid = (union ib_gid *) &addr->sib_addr; +	pkey = ntohs(addr->sib_pkey); + +	list_for_each_entry(cur_dev, &dev_list, list) { +		if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) +			continue; + +		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { +			if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) +				continue; + +			for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) { +				if (!memcmp(&gid, dgid, sizeof(gid))) { +					cma_dev = cur_dev; +					sgid = gid; +					id_priv->id.port_num = p; +					goto found; +				} + +				if (!cma_dev && (gid.global.subnet_prefix == +						 dgid->global.subnet_prefix)) { +					cma_dev = cur_dev; +					sgid = gid; +					id_priv->id.port_num = p; +				} +			} +		} +	} + +	if (!cma_dev) +		return -ENODEV; + +found: +	cma_attach_to_dev(id_priv, cma_dev); +	addr = (struct sockaddr_ib *) cma_src_addr(id_priv); +	memcpy(&addr->sib_addr, &sgid, sizeof sgid); +	cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); +	return 0; +} +  static void cma_deref_id(struct rdma_id_private *id_priv)  {  	if (atomic_dec_and_test(&id_priv->refcount)) @@ -408,7 +478,7 @@ static void cma_deref_id(struct rdma_id_private *id_priv)  }  static int cma_disable_callback(struct rdma_id_private *id_priv, -			      enum cma_state state) +				enum rdma_cm_state state)  {  	mutex_lock(&id_priv->handler_mutex);  	if (id_priv->state != state) { @@ -418,13 +488,9 @@ static int cma_disable_callback(struct rdma_id_private *id_priv,  	return 0;  } -static int cma_has_cm_dev(struct rdma_id_private *id_priv) -{ -	return (id_priv->id.device && id_priv->cm_id.ib); -} -  struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, -				  void *context, enum rdma_port_space ps) +				  void *context, enum rdma_port_space ps, +				  enum ib_qp_type qp_type)  {  	struct rdma_id_private *id_priv; @@ -432,10 +498,12 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,  	if (!id_priv)  		return ERR_PTR(-ENOMEM); -	id_priv->state = CMA_IDLE; +	id_priv->owner = task_pid_nr(current); +	id_priv->state = RDMA_CM_IDLE;  	id_priv->id.context = context;  	id_priv->id.event_handler = event_handler;  	id_priv->id.ps = ps; +	id_priv->id.qp_type = qp_type;  	spin_lock_init(&id_priv->lock);  	mutex_init(&id_priv->qp_mutex);  	init_completion(&id_priv->comp); @@ -503,7 +571,7 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,  	if (IS_ERR(qp))  		return PTR_ERR(qp); -	if (cma_is_ud_ps(id_priv->id.ps)) +	if (id->qp_type == IB_QPT_UD)  		ret = cma_init_ud_qp(id_priv, qp);  	else  		ret = cma_init_conn_qp(id_priv, qp); @@ -537,6 +605,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,  {  	struct ib_qp_attr qp_attr;  	int qp_attr_mask, ret; +	union ib_gid sgid;  	mutex_lock(&id_priv->qp_mutex);  	if (!id_priv->id.qp) { @@ -559,6 +628,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,  	if (ret)  		goto out; +	ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, +			   qp_attr.ah_attr.grh.sgid_index, &sgid); +	if (ret) +		goto out; + +	if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) +	    == RDMA_TRANSPORT_IB && +	    rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) +	    == IB_LINK_LAYER_ETHERNET) { +		ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); + +		if (ret) +			goto out; +	}  	if (conn_param)  		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;  	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); @@ -631,8 +714,8 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,  	qp_attr->port_num = id_priv->id.port_num;  	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; -	if (cma_is_ud_ps(id_priv->id.ps)) { -		ret = cma_set_qkey(id_priv); +	if (id_priv->id.qp_type == IB_QPT_UD) { +		ret = cma_set_qkey(id_priv, 0);  		if (ret)  			return ret; @@ -654,11 +737,12 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,  	id_priv = container_of(id, struct rdma_id_private, id);  	switch (rdma_node_get_transport(id_priv->id.device->node_type)) {  	case RDMA_TRANSPORT_IB: -		if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps)) +		if (!id_priv->cm_id.ib || (id_priv->id.qp_type == IB_QPT_UD))  			ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);  		else  			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,  						 qp_attr_mask); +  		if (qp_attr->qp_state == IB_QPS_RTR)  			qp_attr->rq_psn = id_priv->seq_num;  		break; @@ -681,26 +765,30 @@ EXPORT_SYMBOL(rdma_init_qp_attr);  static inline int cma_zero_addr(struct sockaddr *addr)  { -	struct in6_addr *ip6; - -	if (addr->sa_family == AF_INET) -		return ipv4_is_zeronet( -			((struct sockaddr_in *)addr)->sin_addr.s_addr); -	else { -		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; -		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | -			ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; +	switch (addr->sa_family) { +	case AF_INET: +		return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); +	case AF_INET6: +		return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); +	case AF_IB: +		return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); +	default: +		return 0;  	}  }  static inline int cma_loopback_addr(struct sockaddr *addr)  { -	if (addr->sa_family == AF_INET) -		return ipv4_is_loopback( -			((struct sockaddr_in *) addr)->sin_addr.s_addr); -	else -		return ipv6_addr_loopback( -			&((struct sockaddr_in6 *) addr)->sin6_addr); +	switch (addr->sa_family) { +	case AF_INET: +		return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); +	case AF_INET6: +		return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); +	case AF_IB: +		return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); +	default: +		return 0; +	}  }  static inline int cma_any_addr(struct sockaddr *addr) @@ -708,12 +796,40 @@ static inline int cma_any_addr(struct sockaddr *addr)  	return cma_zero_addr(addr) || cma_loopback_addr(addr);  } -static inline __be16 cma_port(struct sockaddr *addr) +static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)  { -	if (addr->sa_family == AF_INET) +	if (src->sa_family != dst->sa_family) +		return -1; + +	switch (src->sa_family) { +	case AF_INET: +		return ((struct sockaddr_in *) src)->sin_addr.s_addr != +		       ((struct sockaddr_in *) dst)->sin_addr.s_addr; +	case AF_INET6: +		return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, +				     &((struct sockaddr_in6 *) dst)->sin6_addr); +	default: +		return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, +				   &((struct sockaddr_ib *) dst)->sib_addr); +	} +} + +static __be16 cma_port(struct sockaddr *addr) +{ +	struct sockaddr_ib *sib; + +	switch (addr->sa_family) { +	case AF_INET:  		return ((struct sockaddr_in *) addr)->sin_port; -	else +	case AF_INET6:  		return ((struct sockaddr_in6 *) addr)->sin6_port; +	case AF_IB: +		sib = (struct sockaddr_ib *) addr; +		return htons((u16) (be64_to_cpu(sib->sib_sid) & +				    be64_to_cpu(sib->sib_sid_mask))); +	default: +		return 0; +	}  }  static inline int cma_any_port(struct sockaddr *addr) @@ -721,83 +837,93 @@ static inline int cma_any_port(struct sockaddr *addr)  	return !cma_port(addr);  } -static int cma_get_net_info(void *hdr, enum rdma_port_space ps, -			    u8 *ip_ver, __be16 *port, -			    union cma_ip_addr **src, union cma_ip_addr **dst) +static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, +			     struct ib_sa_path_rec *path)  { -	switch (ps) { -	case RDMA_PS_SDP: -		if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) != -		    SDP_MAJ_VERSION) -			return -EINVAL; - -		*ip_ver	= sdp_get_ip_ver(hdr); -		*port	= ((struct sdp_hh *) hdr)->port; -		*src	= &((struct sdp_hh *) hdr)->src_addr; -		*dst	= &((struct sdp_hh *) hdr)->dst_addr; -		break; -	default: -		if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION) -			return -EINVAL; +	struct sockaddr_ib *listen_ib, *ib; -		*ip_ver	= cma_get_ip_ver(hdr); -		*port	= ((struct cma_hdr *) hdr)->port; -		*src	= &((struct cma_hdr *) hdr)->src_addr; -		*dst	= &((struct cma_hdr *) hdr)->dst_addr; -		break; -	} +	listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; +	ib = (struct sockaddr_ib *) &id->route.addr.src_addr; +	ib->sib_family = listen_ib->sib_family; +	ib->sib_pkey = path->pkey; +	ib->sib_flowinfo = path->flow_label; +	memcpy(&ib->sib_addr, &path->sgid, 16); +	ib->sib_sid = listen_ib->sib_sid; +	ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); +	ib->sib_scope_id = listen_ib->sib_scope_id; -	if (*ip_ver != 4 && *ip_ver != 6) -		return -EINVAL; -	return 0; +	ib = (struct sockaddr_ib *) &id->route.addr.dst_addr; +	ib->sib_family = listen_ib->sib_family; +	ib->sib_pkey = path->pkey; +	ib->sib_flowinfo = path->flow_label; +	memcpy(&ib->sib_addr, &path->dgid, 16);  } -static void cma_save_net_info(struct rdma_addr *addr, -			      struct rdma_addr *listen_addr, -			      u8 ip_ver, __be16 port, -			      union cma_ip_addr *src, union cma_ip_addr *dst) +static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, +			      struct cma_hdr *hdr)  {  	struct sockaddr_in *listen4, *ip4; + +	listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr; +	ip4 = (struct sockaddr_in *) &id->route.addr.src_addr; +	ip4->sin_family = listen4->sin_family; +	ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; +	ip4->sin_port = listen4->sin_port; + +	ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr; +	ip4->sin_family = listen4->sin_family; +	ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; +	ip4->sin_port = hdr->port; +} + +static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, +			      struct cma_hdr *hdr) +{  	struct sockaddr_in6 *listen6, *ip6; -	switch (ip_ver) { +	listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr; +	ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr; +	ip6->sin6_family = listen6->sin6_family; +	ip6->sin6_addr = hdr->dst_addr.ip6; +	ip6->sin6_port = listen6->sin6_port; + +	ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr; +	ip6->sin6_family = listen6->sin6_family; +	ip6->sin6_addr = hdr->src_addr.ip6; +	ip6->sin6_port = hdr->port; +} + +static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, +			     struct ib_cm_event *ib_event) +{ +	struct cma_hdr *hdr; + +	if ((listen_id->route.addr.src_addr.ss_family == AF_IB) && +	    (ib_event->event == IB_CM_REQ_RECEIVED)) { +		cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path); +		return 0; +	} + +	hdr = ib_event->private_data; +	if (hdr->cma_version != CMA_VERSION) +		return -EINVAL; + +	switch (cma_get_ip_ver(hdr)) {  	case 4: -		listen4 = (struct sockaddr_in *) &listen_addr->src_addr; -		ip4 = (struct sockaddr_in *) &addr->src_addr; -		ip4->sin_family = listen4->sin_family; -		ip4->sin_addr.s_addr = dst->ip4.addr; -		ip4->sin_port = listen4->sin_port; - -		ip4 = (struct sockaddr_in *) &addr->dst_addr; -		ip4->sin_family = listen4->sin_family; -		ip4->sin_addr.s_addr = src->ip4.addr; -		ip4->sin_port = port; +		cma_save_ip4_info(id, listen_id, hdr);  		break;  	case 6: -		listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr; -		ip6 = (struct sockaddr_in6 *) &addr->src_addr; -		ip6->sin6_family = listen6->sin6_family; -		ip6->sin6_addr = dst->ip6; -		ip6->sin6_port = listen6->sin6_port; - -		ip6 = (struct sockaddr_in6 *) &addr->dst_addr; -		ip6->sin6_family = listen6->sin6_family; -		ip6->sin6_addr = src->ip6; -		ip6->sin6_port = port; +		cma_save_ip6_info(id, listen_id, hdr);  		break;  	default: -		break; +		return -EINVAL;  	} +	return 0;  } -static inline int cma_user_data_offset(enum rdma_port_space ps) +static inline int cma_user_data_offset(struct rdma_id_private *id_priv)  { -	switch (ps) { -	case RDMA_PS_SDP: -		return 0; -	default: -		return sizeof(struct cma_hdr); -	} +	return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr);  }  static void cma_cancel_route(struct rdma_id_private *id_priv) @@ -838,18 +964,17 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv)  }  static void cma_cancel_operation(struct rdma_id_private *id_priv, -				 enum cma_state state) +				 enum rdma_cm_state state)  {  	switch (state) { -	case CMA_ADDR_QUERY: +	case RDMA_CM_ADDR_QUERY:  		rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);  		break; -	case CMA_ROUTE_QUERY: +	case RDMA_CM_ROUTE_QUERY:  		cma_cancel_route(id_priv);  		break; -	case CMA_LISTEN: -		if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) -				&& !id_priv->cma_dev) +	case RDMA_CM_LISTEN: +		if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev)  			cma_cancel_listens(id_priv);  		break;  	default: @@ -898,32 +1023,35 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)  void rdma_destroy_id(struct rdma_cm_id *id)  {  	struct rdma_id_private *id_priv; -	enum cma_state state; +	enum rdma_cm_state state;  	id_priv = container_of(id, struct rdma_id_private, id); -	state = cma_exch(id_priv, CMA_DESTROYING); +	state = cma_exch(id_priv, RDMA_CM_DESTROYING);  	cma_cancel_operation(id_priv, state); -	mutex_lock(&lock); +	/* +	 * Wait for any active callback to finish.  New callbacks will find +	 * the id_priv state set to destroying and abort. +	 */ +	mutex_lock(&id_priv->handler_mutex); +	mutex_unlock(&id_priv->handler_mutex); +  	if (id_priv->cma_dev) { -		mutex_unlock(&lock);  		switch (rdma_node_get_transport(id_priv->id.device->node_type)) {  		case RDMA_TRANSPORT_IB: -			if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) +			if (id_priv->cm_id.ib)  				ib_destroy_cm_id(id_priv->cm_id.ib);  			break;  		case RDMA_TRANSPORT_IWARP: -			if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw)) +			if (id_priv->cm_id.iw)  				iw_destroy_cm_id(id_priv->cm_id.iw);  			break;  		default:  			break;  		}  		cma_leave_mc_groups(id_priv); -		mutex_lock(&lock); -		cma_detach_from_dev(id_priv); +		cma_release_dev(id_priv);  	} -	mutex_unlock(&lock);  	cma_release_port(id_priv);  	cma_deref_id(id_priv); @@ -961,16 +1089,6 @@ reject:  	return ret;  } -static int cma_verify_rep(struct rdma_id_private *id_priv, void *data) -{ -	if (id_priv->id.ps == RDMA_PS_SDP && -	    sdp_get_majv(((struct sdp_hah *) data)->sdp_version) != -	    SDP_MAJ_VERSION) -		return -EINVAL; - -	return 0; -} -  static void cma_set_rep_event_data(struct rdma_cm_event *event,  				   struct ib_cm_rep_event_param *rep_data,  				   void *private_data) @@ -992,9 +1110,9 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  	int ret = 0;  	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && -		cma_disable_callback(id_priv, CMA_CONNECT)) || +		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||  	    (ib_event->event == IB_CM_TIMEWAIT_EXIT && -		cma_disable_callback(id_priv, CMA_DISCONNECT))) +		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))  		return 0;  	memset(&event, 0, sizeof event); @@ -1005,15 +1123,13 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  		event.status = -ETIMEDOUT;  		break;  	case IB_CM_REP_RECEIVED: -		event.status = cma_verify_rep(id_priv, ib_event->private_data); -		if (event.status) -			event.event = RDMA_CM_EVENT_CONNECT_ERROR; -		else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) { +		if (id_priv->id.qp) {  			event.status = cma_rep_recv(id_priv);  			event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :  						     RDMA_CM_EVENT_ESTABLISHED; -		} else +		} else {  			event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; +		}  		cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,  				       ib_event->private_data);  		break; @@ -1025,7 +1141,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  		event.status = -ETIMEDOUT; /* fall through */  	case IB_CM_DREQ_RECEIVED:  	case IB_CM_DREP_RECEIVED: -		if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT)) +		if (!cma_comp_exch(id_priv, RDMA_CM_CONNECT, +				   RDMA_CM_DISCONNECT))  			goto out;  		event.event = RDMA_CM_EVENT_DISCONNECTED;  		break; @@ -1052,7 +1169,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  	if (ret) {  		/* Destroy the CM ID by returning a non-zero value. */  		id_priv->cm_id.ib = NULL; -		cma_exch(id_priv, CMA_DESTROYING); +		cma_exch(id_priv, RDMA_CM_DESTROYING);  		mutex_unlock(&id_priv->handler_mutex);  		rdma_destroy_id(&id_priv->id);  		return ret; @@ -1068,53 +1185,44 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,  	struct rdma_id_private *id_priv;  	struct rdma_cm_id *id;  	struct rdma_route *rt; -	union cma_ip_addr *src, *dst; -	__be16 port; -	u8 ip_ver;  	int ret; -	if (cma_get_net_info(ib_event->private_data, listen_id->ps, -			     &ip_ver, &port, &src, &dst)) -		goto err; -  	id = rdma_create_id(listen_id->event_handler, listen_id->context, -			    listen_id->ps); +			    listen_id->ps, ib_event->param.req_rcvd.qp_type);  	if (IS_ERR(id)) -		goto err; +		return NULL; -	cma_save_net_info(&id->route.addr, &listen_id->route.addr, -			  ip_ver, port, src, dst); +	id_priv = container_of(id, struct rdma_id_private, id); +	if (cma_save_net_info(id, listen_id, ib_event)) +		goto err;  	rt = &id->route;  	rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;  	rt->path_rec = kmalloc(sizeof *rt->path_rec * rt->num_paths,  			       GFP_KERNEL);  	if (!rt->path_rec) -		goto destroy_id; +		goto err;  	rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;  	if (rt->num_paths == 2)  		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; -	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { +	if (cma_any_addr(cma_src_addr(id_priv))) {  		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;  		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); -		ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey); +		ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));  	} else { -		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, -					&rt->addr.dev_addr); +		ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr);  		if (ret) -			goto destroy_id; +			goto err;  	}  	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); -	id_priv = container_of(id, struct rdma_id_private, id); -	id_priv->state = CMA_CONNECT; +	id_priv->state = RDMA_CM_CONNECT;  	return id_priv; -destroy_id: -	rdma_destroy_id(id);  err: +	rdma_destroy_id(id);  	return NULL;  } @@ -1123,33 +1231,24 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,  {  	struct rdma_id_private *id_priv;  	struct rdma_cm_id *id; -	union cma_ip_addr *src, *dst; -	__be16 port; -	u8 ip_ver;  	int ret;  	id = rdma_create_id(listen_id->event_handler, listen_id->context, -			    listen_id->ps); +			    listen_id->ps, IB_QPT_UD);  	if (IS_ERR(id))  		return NULL; - -	if (cma_get_net_info(ib_event->private_data, listen_id->ps, -			     &ip_ver, &port, &src, &dst)) +	id_priv = container_of(id, struct rdma_id_private, id); +	if (cma_save_net_info(id, listen_id, ib_event))  		goto err; -	cma_save_net_info(&id->route.addr, &listen_id->route.addr, -			  ip_ver, port, src, dst); -  	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { -		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, -					&id->route.addr.dev_addr); +		ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr);  		if (ret)  			goto err;  	} -	id_priv = container_of(id, struct rdma_id_private, id); -	id_priv->state = CMA_CONNECT; +	id_priv->state = RDMA_CM_CONNECT;  	return id_priv;  err:  	rdma_destroy_id(id); @@ -1171,6 +1270,15 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,  	event->param.conn.qp_num = req_data->remote_qpn;  } +static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) +{ +	return (((ib_event->event == IB_CM_REQ_RECEIVED) && +		 (ib_event->param.req_rcvd.qp_type == id->qp_type)) || +		((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && +		 (id->qp_type == IB_QPT_UD)) || +		(!id->qp_type)); +} +  static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  {  	struct rdma_id_private *listen_id, *conn_id; @@ -1178,13 +1286,16 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  	int offset, ret;  	listen_id = cm_id->context; -	if (cma_disable_callback(listen_id, CMA_LISTEN)) +	if (!cma_check_req_qp_type(&listen_id->id, ib_event)) +		return -EINVAL; + +	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))  		return -ECONNABORTED;  	memset(&event, 0, sizeof event); -	offset = cma_user_data_offset(listen_id->id.ps); +	offset = cma_user_data_offset(listen_id);  	event.event = RDMA_CM_EVENT_CONNECT_REQUEST; -	if (cma_is_ud_ps(listen_id->id.ps)) { +	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {  		conn_id = cma_new_udp_id(&listen_id->id, ib_event);  		event.param.ud.private_data = ib_event->private_data + offset;  		event.param.ud.private_data_len = @@ -1196,93 +1307,89 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  	}  	if (!conn_id) {  		ret = -ENOMEM; -		goto out; +		goto err1;  	}  	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); -	mutex_lock(&lock); -	ret = cma_acquire_dev(conn_id); -	mutex_unlock(&lock); +	ret = cma_acquire_dev(conn_id, listen_id);  	if (ret) -		goto release_conn_id; +		goto err2;  	conn_id->cm_id.ib = cm_id;  	cm_id->context = conn_id;  	cm_id->cm_handler = cma_ib_handler; +	/* +	 * Protect against the user destroying conn_id from another thread +	 * until we're done accessing it. +	 */ +	atomic_inc(&conn_id->refcount);  	ret = conn_id->id.event_handler(&conn_id->id, &event); -	if (!ret) { -		/* -		 * Acquire mutex to prevent user executing rdma_destroy_id() -		 * while we're accessing the cm_id. -		 */ -		mutex_lock(&lock); -		if (cma_comp(conn_id, CMA_CONNECT) && -		    !cma_is_ud_ps(conn_id->id.ps)) -			ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); -		mutex_unlock(&lock); -		mutex_unlock(&conn_id->handler_mutex); -		goto out; -	} +	if (ret) +		goto err3; +	/* +	 * Acquire mutex to prevent user executing rdma_destroy_id() +	 * while we're accessing the cm_id. +	 */ +	mutex_lock(&lock); +	if (cma_comp(conn_id, RDMA_CM_CONNECT) && +	    (conn_id->id.qp_type != IB_QPT_UD)) +		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); +	mutex_unlock(&lock); +	mutex_unlock(&conn_id->handler_mutex); +	mutex_unlock(&listen_id->handler_mutex); +	cma_deref_id(conn_id); +	return 0; +err3: +	cma_deref_id(conn_id);  	/* Destroy the CM ID by returning a non-zero value. */  	conn_id->cm_id.ib = NULL; - -release_conn_id: -	cma_exch(conn_id, CMA_DESTROYING); +err2: +	cma_exch(conn_id, RDMA_CM_DESTROYING);  	mutex_unlock(&conn_id->handler_mutex); -	rdma_destroy_id(&conn_id->id); - -out: +err1:  	mutex_unlock(&listen_id->handler_mutex); +	if (conn_id) +		rdma_destroy_id(&conn_id->id);  	return ret;  } -static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) +__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr)  { -	return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr))); +	if (addr->sa_family == AF_IB) +		return ((struct sockaddr_ib *) addr)->sib_sid; + +	return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr)));  } +EXPORT_SYMBOL(rdma_get_service_id);  static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,  				 struct ib_cm_compare_data *compare)  {  	struct cma_hdr *cma_data, *cma_mask; -	struct sdp_hh *sdp_data, *sdp_mask;  	__be32 ip4_addr;  	struct in6_addr ip6_addr;  	memset(compare, 0, sizeof *compare);  	cma_data = (void *) compare->data;  	cma_mask = (void *) compare->mask; -	sdp_data = (void *) compare->data; -	sdp_mask = (void *) compare->mask;  	switch (addr->sa_family) {  	case AF_INET:  		ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; -		if (ps == RDMA_PS_SDP) { -			sdp_set_ip_ver(sdp_data, 4); -			sdp_set_ip_ver(sdp_mask, 0xF); -			sdp_data->dst_addr.ip4.addr = ip4_addr; -			sdp_mask->dst_addr.ip4.addr = htonl(~0); -		} else { -			cma_set_ip_ver(cma_data, 4); -			cma_set_ip_ver(cma_mask, 0xF); +		cma_set_ip_ver(cma_data, 4); +		cma_set_ip_ver(cma_mask, 0xF); +		if (!cma_any_addr(addr)) {  			cma_data->dst_addr.ip4.addr = ip4_addr;  			cma_mask->dst_addr.ip4.addr = htonl(~0);  		}  		break;  	case AF_INET6:  		ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; -		if (ps == RDMA_PS_SDP) { -			sdp_set_ip_ver(sdp_data, 6); -			sdp_set_ip_ver(sdp_mask, 0xF); -			sdp_data->dst_addr.ip6 = ip6_addr; -			memset(&sdp_mask->dst_addr.ip6, 0xFF, -			       sizeof sdp_mask->dst_addr.ip6); -		} else { -			cma_set_ip_ver(cma_data, 6); -			cma_set_ip_ver(cma_mask, 0xF); +		cma_set_ip_ver(cma_data, 6); +		cma_set_ip_ver(cma_mask, 0xF); +		if (!cma_any_addr(addr)) {  			cma_data->dst_addr.ip6 = ip6_addr;  			memset(&cma_mask->dst_addr.ip6, 0xFF,  			       sizeof cma_mask->dst_addr.ip6); @@ -1297,10 +1404,11 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)  {  	struct rdma_id_private *id_priv = iw_id->context;  	struct rdma_cm_event event; -	struct sockaddr_in *sin;  	int ret = 0; +	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; +	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; -	if (cma_disable_callback(id_priv, CMA_CONNECT)) +	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))  		return 0;  	memset(&event, 0, sizeof event); @@ -1309,13 +1417,15 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)  		event.event = RDMA_CM_EVENT_DISCONNECTED;  		break;  	case IW_CM_EVENT_CONNECT_REPLY: -		sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; -		*sin = iw_event->local_addr; -		sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; -		*sin = iw_event->remote_addr; +		memcpy(cma_src_addr(id_priv), laddr, +		       rdma_addr_size(laddr)); +		memcpy(cma_dst_addr(id_priv), raddr, +		       rdma_addr_size(raddr));  		switch (iw_event->status) {  		case 0:  			event.event = RDMA_CM_EVENT_ESTABLISHED; +			event.param.conn.initiator_depth = iw_event->ird; +			event.param.conn.responder_resources = iw_event->ord;  			break;  		case -ECONNRESET:  		case -ECONNREFUSED: @@ -1331,6 +1441,8 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)  		break;  	case IW_CM_EVENT_ESTABLISHED:  		event.event = RDMA_CM_EVENT_ESTABLISHED; +		event.param.conn.initiator_depth = iw_event->ird; +		event.param.conn.responder_resources = iw_event->ord;  		break;  	default:  		BUG_ON(1); @@ -1343,7 +1455,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)  	if (ret) {  		/* Destroy the CM ID by returning a non-zero value. */  		id_priv->cm_id.iw = NULL; -		cma_exch(id_priv, CMA_DESTROYING); +		cma_exch(id_priv, RDMA_CM_DESTROYING);  		mutex_unlock(&id_priv->handler_mutex);  		rdma_destroy_id(&id_priv->id);  		return ret; @@ -1358,45 +1470,36 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,  {  	struct rdma_cm_id *new_cm_id;  	struct rdma_id_private *listen_id, *conn_id; -	struct sockaddr_in *sin; -	struct net_device *dev = NULL;  	struct rdma_cm_event event;  	int ret;  	struct ib_device_attr attr; +	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; +	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;  	listen_id = cm_id->context; -	if (cma_disable_callback(listen_id, CMA_LISTEN)) +	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))  		return -ECONNABORTED;  	/* Create a new RDMA id for the new IW CM ID */  	new_cm_id = rdma_create_id(listen_id->id.event_handler,  				   listen_id->id.context, -				   RDMA_PS_TCP); +				   RDMA_PS_TCP, IB_QPT_RC);  	if (IS_ERR(new_cm_id)) {  		ret = -ENOMEM;  		goto out;  	}  	conn_id = container_of(new_cm_id, struct rdma_id_private, id);  	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); -	conn_id->state = CMA_CONNECT; +	conn_id->state = RDMA_CM_CONNECT; -	dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); -	if (!dev) { -		ret = -EADDRNOTAVAIL; -		mutex_unlock(&conn_id->handler_mutex); -		rdma_destroy_id(new_cm_id); -		goto out; -	} -	ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); +	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);  	if (ret) {  		mutex_unlock(&conn_id->handler_mutex);  		rdma_destroy_id(new_cm_id);  		goto out;  	} -	mutex_lock(&lock); -	ret = cma_acquire_dev(conn_id); -	mutex_unlock(&lock); +	ret = cma_acquire_dev(conn_id, listen_id);  	if (ret) {  		mutex_unlock(&conn_id->handler_mutex);  		rdma_destroy_id(new_cm_id); @@ -1407,10 +1510,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,  	cm_id->context = conn_id;  	cm_id->cm_handler = cma_iw_handler; -	sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; -	*sin = iw_event->local_addr; -	sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; -	*sin = iw_event->remote_addr; +	memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); +	memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));  	ret = ib_query_device(conn_id->id.device, &attr);  	if (ret) { @@ -1423,23 +1524,29 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,  	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;  	event.param.conn.private_data = iw_event->private_data;  	event.param.conn.private_data_len = iw_event->private_data_len; -	event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; -	event.param.conn.responder_resources = attr.max_qp_rd_atom; +	event.param.conn.initiator_depth = iw_event->ird; +	event.param.conn.responder_resources = iw_event->ord; + +	/* +	 * Protect against the user destroying conn_id from another thread +	 * until we're done accessing it. +	 */ +	atomic_inc(&conn_id->refcount);  	ret = conn_id->id.event_handler(&conn_id->id, &event);  	if (ret) {  		/* User wants to destroy the CM ID */  		conn_id->cm_id.iw = NULL; -		cma_exch(conn_id, CMA_DESTROYING); +		cma_exch(conn_id, RDMA_CM_DESTROYING);  		mutex_unlock(&conn_id->handler_mutex); +		cma_deref_id(conn_id);  		rdma_destroy_id(&conn_id->id);  		goto out;  	}  	mutex_unlock(&conn_id->handler_mutex); +	cma_deref_id(conn_id);  out: -	if (dev) -		dev_put(dev);  	mutex_unlock(&listen_id->handler_mutex);  	return ret;  } @@ -1448,17 +1555,19 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)  {  	struct ib_cm_compare_data compare_data;  	struct sockaddr *addr; +	struct ib_cm_id	*id;  	__be64 svc_id;  	int ret; -	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler, -					    id_priv); -	if (IS_ERR(id_priv->cm_id.ib)) -		return PTR_ERR(id_priv->cm_id.ib); +	id = ib_create_cm_id(id_priv->id.device, cma_req_handler, id_priv); +	if (IS_ERR(id)) +		return PTR_ERR(id); -	addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; -	svc_id = cma_get_service_id(id_priv->id.ps, addr); -	if (cma_any_addr(addr)) +	id_priv->cm_id.ib = id; + +	addr = cma_src_addr(id_priv); +	svc_id = rdma_get_service_id(&id_priv->id, addr); +	if (cma_any_addr(addr) && !id_priv->afonly)  		ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);  	else {  		cma_set_compare_data(id_priv->id.ps, addr, &compare_data); @@ -1476,16 +1585,18 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)  static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)  {  	int ret; -	struct sockaddr_in *sin; +	struct iw_cm_id	*id; + +	id = iw_create_cm_id(id_priv->id.device, +			     iw_conn_req_handler, +			     id_priv); +	if (IS_ERR(id)) +		return PTR_ERR(id); -	id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, -					    iw_conn_req_handler, -					    id_priv); -	if (IS_ERR(id_priv->cm_id.iw)) -		return PTR_ERR(id_priv->cm_id.iw); +	id_priv->cm_id.iw = id; -	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; -	id_priv->cm_id.iw->local_addr = *sin; +	memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), +	       rdma_addr_size(cma_src_addr(id_priv)));  	ret = iw_cm_listen(id_priv->cm_id.iw, backlog); @@ -1514,20 +1625,26 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,  	struct rdma_cm_id *id;  	int ret; -	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps); +	if (cma_family(id_priv) == AF_IB && +	    rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB) +		return; + +	id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, +			    id_priv->id.qp_type);  	if (IS_ERR(id))  		return;  	dev_id_priv = container_of(id, struct rdma_id_private, id); -	dev_id_priv->state = CMA_ADDR_BOUND; -	memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, -	       ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); +	dev_id_priv->state = RDMA_CM_ADDR_BOUND; +	memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), +	       rdma_addr_size(cma_src_addr(id_priv)));  	cma_attach_to_dev(dev_id_priv, cma_dev);  	list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);  	atomic_inc(&id_priv->refcount);  	dev_id_priv->internal_id = 1; +	dev_id_priv->afonly = id_priv->afonly;  	ret = rdma_listen(id, id_priv->backlog);  	if (ret) @@ -1546,50 +1663,6 @@ static void cma_listen_on_all(struct rdma_id_private *id_priv)  	mutex_unlock(&lock);  } -int rdma_listen(struct rdma_cm_id *id, int backlog) -{ -	struct rdma_id_private *id_priv; -	int ret; - -	id_priv = container_of(id, struct rdma_id_private, id); -	if (id_priv->state == CMA_IDLE) { -		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; -		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); -		if (ret) -			return ret; -	} - -	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN)) -		return -EINVAL; - -	id_priv->backlog = backlog; -	if (id->device) { -		switch (rdma_node_get_transport(id->device->node_type)) { -		case RDMA_TRANSPORT_IB: -			ret = cma_ib_listen(id_priv); -			if (ret) -				goto err; -			break; -		case RDMA_TRANSPORT_IWARP: -			ret = cma_iw_listen(id_priv, backlog); -			if (ret) -				goto err; -			break; -		default: -			ret = -ENOSYS; -			goto err; -		} -	} else -		cma_listen_on_all(id_priv); - -	return 0; -err: -	id_priv->backlog = 0; -	cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND); -	return ret; -} -EXPORT_SYMBOL(rdma_listen); -  void rdma_set_service_type(struct rdma_cm_id *id, int tos)  {  	struct rdma_id_private *id_priv; @@ -1611,8 +1684,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,  		route->num_paths = 1;  		*route->path_rec = *path_rec;  	} else { -		work->old_state = CMA_ROUTE_QUERY; -		work->new_state = CMA_ADDR_RESOLVED; +		work->old_state = RDMA_CM_ROUTE_QUERY; +		work->new_state = RDMA_CM_ADDR_RESOLVED;  		work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;  		work->event.status = status;  	} @@ -1623,31 +1696,39 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,  static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,  			      struct cma_work *work)  { -	struct rdma_addr *addr = &id_priv->id.route.addr; +	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;  	struct ib_sa_path_rec path_rec;  	ib_sa_comp_mask comp_mask;  	struct sockaddr_in6 *sin6; +	struct sockaddr_ib *sib;  	memset(&path_rec, 0, sizeof path_rec); -	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); -	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); -	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); +	rdma_addr_get_sgid(dev_addr, &path_rec.sgid); +	rdma_addr_get_dgid(dev_addr, &path_rec.dgid); +	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));  	path_rec.numb_path = 1;  	path_rec.reversible = 1; -	path_rec.service_id = cma_get_service_id(id_priv->id.ps, -							(struct sockaddr *) &addr->dst_addr); +	path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));  	comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |  		    IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |  		    IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; -	if (addr->src_addr.ss_family == AF_INET) { +	switch (cma_family(id_priv)) { +	case AF_INET:  		path_rec.qos_class = cpu_to_be16((u16) id_priv->tos);  		comp_mask |= IB_SA_PATH_REC_QOS_CLASS; -	} else { -		sin6 = (struct sockaddr_in6 *) &addr->src_addr; +		break; +	case AF_INET6: +		sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv);  		path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20);  		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; +		break; +	case AF_IB: +		sib = (struct sockaddr_ib *) cma_src_addr(id_priv); +		path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); +		comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; +		break;  	}  	id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, @@ -1670,7 +1751,7 @@ static void cma_work_handler(struct work_struct *_work)  		goto out;  	if (id_priv->id.event_handler(&id_priv->id, &work->event)) { -		cma_exch(id_priv, CMA_DESTROYING); +		cma_exch(id_priv, RDMA_CM_DESTROYING);  		destroy = 1;  	}  out: @@ -1688,12 +1769,12 @@ static void cma_ndev_work_handler(struct work_struct *_work)  	int destroy = 0;  	mutex_lock(&id_priv->handler_mutex); -	if (id_priv->state == CMA_DESTROYING || -	    id_priv->state == CMA_DEVICE_REMOVAL) +	if (id_priv->state == RDMA_CM_DESTROYING || +	    id_priv->state == RDMA_CM_DEVICE_REMOVAL)  		goto out;  	if (id_priv->id.event_handler(&id_priv->id, &work->event)) { -		cma_exch(id_priv, CMA_DESTROYING); +		cma_exch(id_priv, RDMA_CM_DESTROYING);  		destroy = 1;  	} @@ -1717,8 +1798,8 @@ static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)  	work->id = id_priv;  	INIT_WORK(&work->work, cma_work_handler); -	work->old_state = CMA_ROUTE_QUERY; -	work->new_state = CMA_ROUTE_RESOLVED; +	work->old_state = RDMA_CM_ROUTE_QUERY; +	work->new_state = RDMA_CM_ROUTE_RESOLVED;  	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;  	route->path_rec = kmalloc(sizeof *route->path_rec, GFP_KERNEL); @@ -1747,7 +1828,8 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED)) +	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, +			   RDMA_CM_ROUTE_RESOLVED))  		return -EINVAL;  	id->route.path_rec = kmemdup(path_rec, sizeof *path_rec * num_paths, @@ -1760,7 +1842,7 @@ int rdma_set_ib_paths(struct rdma_cm_id *id,  	id->route.num_paths = num_paths;  	return 0;  err: -	cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED); +	cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_ADDR_RESOLVED);  	return ret;  }  EXPORT_SYMBOL(rdma_set_ib_paths); @@ -1775,26 +1857,41 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)  	work->id = id_priv;  	INIT_WORK(&work->work, cma_work_handler); -	work->old_state = CMA_ROUTE_QUERY; -	work->new_state = CMA_ROUTE_RESOLVED; +	work->old_state = RDMA_CM_ROUTE_QUERY; +	work->new_state = RDMA_CM_ROUTE_RESOLVED;  	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;  	queue_work(cma_wq, &work->work);  	return 0;  } +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ +	int prio; +	struct net_device *dev; + +	prio = rt_tos2priority(tos); +	dev = ndev->priv_flags & IFF_802_1Q_VLAN ? +		vlan_dev_real_dev(ndev) : ndev; + +	if (dev->num_tc) +		return netdev_get_prio_tc_map(dev, prio); + +#if IS_ENABLED(CONFIG_VLAN_8021Q) +	if (ndev->priv_flags & IFF_802_1Q_VLAN) +		return (vlan_dev_get_egress_qos_mask(ndev, prio) & +			VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +#endif +	return 0; +} +  static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)  {  	struct rdma_route *route = &id_priv->id.route;  	struct rdma_addr *addr = &route->addr;  	struct cma_work *work;  	int ret; -	struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; -	struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;  	struct net_device *ndev = NULL; -	u16 vid; -	if (src_addr->sin_family != dst_addr->sin_family) -		return -EINVAL;  	work = kzalloc(sizeof *work, GFP_KERNEL);  	if (!work) @@ -1818,17 +1915,20 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)  		goto err2;  	} -	vid = rdma_vlan_dev_vlan_id(ndev); +	route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); +	memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); +	memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len); -	iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); -	iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); +	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, +		    &route->path_rec->sgid); +	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, +		    &route->path_rec->dgid);  	route->path_rec->hop_limit = 1;  	route->path_rec->reversible = 1;  	route->path_rec->pkey = cpu_to_be16(0xffff);  	route->path_rec->mtu_selector = IB_SA_EQ; -	route->path_rec->sl = id_priv->tos >> 5; - +	route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);  	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);  	route->path_rec->rate_selector = IB_SA_EQ;  	route->path_rec->rate = iboe_get_rate(ndev); @@ -1840,8 +1940,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)  		goto err2;  	} -	work->old_state = CMA_ROUTE_QUERY; -	work->new_state = CMA_ROUTE_RESOLVED; +	work->old_state = RDMA_CM_ROUTE_QUERY; +	work->new_state = RDMA_CM_ROUTE_RESOLVED;  	work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;  	work->event.status = 0; @@ -1863,7 +1963,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY)) +	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, RDMA_CM_ROUTE_QUERY))  		return -EINVAL;  	atomic_inc(&id_priv->refcount); @@ -1892,34 +1992,63 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)  	return 0;  err: -	cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED); +	cma_comp_exch(id_priv, RDMA_CM_ROUTE_QUERY, RDMA_CM_ADDR_RESOLVED);  	cma_deref_id(id_priv);  	return ret;  }  EXPORT_SYMBOL(rdma_resolve_route); +static void cma_set_loopback(struct sockaddr *addr) +{ +	switch (addr->sa_family) { +	case AF_INET: +		((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); +		break; +	case AF_INET6: +		ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, +			      0, 0, 0, htonl(1)); +		break; +	default: +		ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, +			    0, 0, 0, htonl(1)); +		break; +	} +} +  static int cma_bind_loopback(struct rdma_id_private *id_priv)  { -	struct cma_device *cma_dev; +	struct cma_device *cma_dev, *cur_dev;  	struct ib_port_attr port_attr;  	union ib_gid gid;  	u16 pkey;  	int ret;  	u8 p; +	cma_dev = NULL;  	mutex_lock(&lock); -	if (list_empty(&dev_list)) { +	list_for_each_entry(cur_dev, &dev_list, list) { +		if (cma_family(id_priv) == AF_IB && +		    rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) +			continue; + +		if (!cma_dev) +			cma_dev = cur_dev; + +		for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { +			if (!ib_query_port(cur_dev->device, p, &port_attr) && +			    port_attr.state == IB_PORT_ACTIVE) { +				cma_dev = cur_dev; +				goto port_found; +			} +		} +	} + +	if (!cma_dev) {  		ret = -ENODEV;  		goto out;  	} -	list_for_each_entry(cma_dev, &dev_list, list) -		for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p) -			if (!ib_query_port(cma_dev->device, p, &port_attr) && -			    port_attr.state == IB_PORT_ACTIVE) -				goto port_found;  	p = 1; -	cma_dev = list_entry(dev_list.next, struct cma_device, list);  port_found:  	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); @@ -1938,6 +2067,7 @@ port_found:  	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);  	id_priv->id.port_num = p;  	cma_attach_to_dev(id_priv, cma_dev); +	cma_set_loopback(cma_src_addr(id_priv));  out:  	mutex_unlock(&lock);  	return ret; @@ -1951,34 +2081,25 @@ static void addr_handler(int status, struct sockaddr *src_addr,  	memset(&event, 0, sizeof event);  	mutex_lock(&id_priv->handler_mutex); - -	/* -	 * Grab mutex to block rdma_destroy_id() from removing the device while -	 * we're trying to acquire it. -	 */ -	mutex_lock(&lock); -	if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) { -		mutex_unlock(&lock); +	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, +			   RDMA_CM_ADDR_RESOLVED))  		goto out; -	} +	memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));  	if (!status && !id_priv->cma_dev) -		status = cma_acquire_dev(id_priv); -	mutex_unlock(&lock); +		status = cma_acquire_dev(id_priv, NULL);  	if (status) { -		if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND)) +		if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, +				   RDMA_CM_ADDR_BOUND))  			goto out;  		event.event = RDMA_CM_EVENT_ADDR_ERROR;  		event.status = status; -	} else { -		memcpy(&id_priv->id.route.addr.src_addr, src_addr, -		       ip_addr_size(src_addr)); +	} else  		event.event = RDMA_CM_EVENT_ADDR_RESOLVED; -	}  	if (id_priv->id.event_handler(&id_priv->id, &event)) { -		cma_exch(id_priv, CMA_DESTROYING); +		cma_exch(id_priv, RDMA_CM_DESTROYING);  		mutex_unlock(&id_priv->handler_mutex);  		cma_deref_id(id_priv);  		rdma_destroy_id(&id_priv->id); @@ -1992,7 +2113,6 @@ out:  static int cma_resolve_loopback(struct rdma_id_private *id_priv)  {  	struct cma_work *work; -	struct sockaddr *src, *dst;  	union ib_gid gid;  	int ret; @@ -2009,22 +2129,40 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)  	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);  	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); -	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; -	if (cma_zero_addr(src)) { -		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; -		if ((src->sa_family = dst->sa_family) == AF_INET) { -			((struct sockaddr_in *) src)->sin_addr.s_addr = -				((struct sockaddr_in *) dst)->sin_addr.s_addr; -		} else { -			ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr, -				       &((struct sockaddr_in6 *) dst)->sin6_addr); -		} +	work->id = id_priv; +	INIT_WORK(&work->work, cma_work_handler); +	work->old_state = RDMA_CM_ADDR_QUERY; +	work->new_state = RDMA_CM_ADDR_RESOLVED; +	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; +	queue_work(cma_wq, &work->work); +	return 0; +err: +	kfree(work); +	return ret; +} + +static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) +{ +	struct cma_work *work; +	int ret; + +	work = kzalloc(sizeof *work, GFP_KERNEL); +	if (!work) +		return -ENOMEM; + +	if (!id_priv->cma_dev) { +		ret = cma_resolve_ib_dev(id_priv); +		if (ret) +			goto err;  	} +	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) +		&(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); +  	work->id = id_priv;  	INIT_WORK(&work->work, cma_work_handler); -	work->old_state = CMA_ADDR_QUERY; -	work->new_state = CMA_ADDR_RESOLVED; +	work->old_state = RDMA_CM_ADDR_QUERY; +	work->new_state = RDMA_CM_ADDR_RESOLVED;  	work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;  	queue_work(cma_wq, &work->work);  	return 0; @@ -2038,9 +2176,13 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,  {  	if (!src_addr || !src_addr->sa_family) {  		src_addr = (struct sockaddr *) &id->route.addr.src_addr; -		if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) { +		src_addr->sa_family = dst_addr->sa_family; +		if (dst_addr->sa_family == AF_INET6) {  			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =  				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; +		} else if (dst_addr->sa_family == AF_IB) { +			((struct sockaddr_ib *) src_addr)->sib_pkey = +				((struct sockaddr_ib *) dst_addr)->sib_pkey;  		}  	}  	return rdma_bind_addr(id, src_addr); @@ -2053,41 +2195,107 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (id_priv->state == CMA_IDLE) { +	if (id_priv->state == RDMA_CM_IDLE) {  		ret = cma_bind_addr(id, src_addr, dst_addr);  		if (ret)  			return ret;  	} -	if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY)) +	if (cma_family(id_priv) != dst_addr->sa_family) +		return -EINVAL; + +	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))  		return -EINVAL;  	atomic_inc(&id_priv->refcount); -	memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); -	if (cma_any_addr(dst_addr)) +	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); +	if (cma_any_addr(dst_addr)) {  		ret = cma_resolve_loopback(id_priv); -	else -		ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr, -				      dst_addr, &id->route.addr.dev_addr, -				      timeout_ms, addr_handler, id_priv); +	} else { +		if (dst_addr->sa_family == AF_IB) { +			ret = cma_resolve_ib_addr(id_priv); +		} else { +			ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), +					      dst_addr, &id->route.addr.dev_addr, +					      timeout_ms, addr_handler, id_priv); +		} +	}  	if (ret)  		goto err;  	return 0;  err: -	cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND); +	cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND);  	cma_deref_id(id_priv);  	return ret;  }  EXPORT_SYMBOL(rdma_resolve_addr); +int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) +{ +	struct rdma_id_private *id_priv; +	unsigned long flags; +	int ret; + +	id_priv = container_of(id, struct rdma_id_private, id); +	spin_lock_irqsave(&id_priv->lock, flags); +	if (reuse || id_priv->state == RDMA_CM_IDLE) { +		id_priv->reuseaddr = reuse; +		ret = 0; +	} else { +		ret = -EINVAL; +	} +	spin_unlock_irqrestore(&id_priv->lock, flags); +	return ret; +} +EXPORT_SYMBOL(rdma_set_reuseaddr); + +int rdma_set_afonly(struct rdma_cm_id *id, int afonly) +{ +	struct rdma_id_private *id_priv; +	unsigned long flags; +	int ret; + +	id_priv = container_of(id, struct rdma_id_private, id); +	spin_lock_irqsave(&id_priv->lock, flags); +	if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { +		id_priv->options |= (1 << CMA_OPTION_AFONLY); +		id_priv->afonly = afonly; +		ret = 0; +	} else { +		ret = -EINVAL; +	} +	spin_unlock_irqrestore(&id_priv->lock, flags); +	return ret; +} +EXPORT_SYMBOL(rdma_set_afonly); +  static void cma_bind_port(struct rdma_bind_list *bind_list,  			  struct rdma_id_private *id_priv)  { -	struct sockaddr_in *sin; +	struct sockaddr *addr; +	struct sockaddr_ib *sib; +	u64 sid, mask; +	__be16 port; + +	addr = cma_src_addr(id_priv); +	port = htons(bind_list->port); -	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; -	sin->sin_port = htons(bind_list->port); +	switch (addr->sa_family) { +	case AF_INET: +		((struct sockaddr_in *) addr)->sin_port = port; +		break; +	case AF_INET6: +		((struct sockaddr_in6 *) addr)->sin6_port = port; +		break; +	case AF_IB: +		sib = (struct sockaddr_ib *) addr; +		sid = be64_to_cpu(sib->sib_sid); +		mask = be64_to_cpu(sib->sib_sid_mask); +		sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); +		sib->sib_sid_mask = cpu_to_be64(~0ULL); +		break; +	}  	id_priv->bind_list = bind_list;  	hlist_add_head(&id_priv->node, &bind_list->owners);  } @@ -2096,33 +2304,23 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv,  			  unsigned short snum)  {  	struct rdma_bind_list *bind_list; -	int port, ret; +	int ret;  	bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL);  	if (!bind_list)  		return -ENOMEM; -	do { -		ret = idr_get_new_above(ps, bind_list, snum, &port); -	} while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); - -	if (ret) -		goto err1; - -	if (port != snum) { -		ret = -EADDRNOTAVAIL; -		goto err2; -	} +	ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL); +	if (ret < 0) +		goto err;  	bind_list->ps = ps; -	bind_list->port = (unsigned short) port; +	bind_list->port = (unsigned short)ret;  	cma_bind_port(bind_list, id_priv);  	return 0; -err2: -	idr_remove(ps, port); -err1: +err:  	kfree(bind_list); -	return ret; +	return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;  }  static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) @@ -2131,9 +2329,9 @@ static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)  	int low, high, remaining;  	unsigned int rover; -	inet_get_local_port_range(&low, &high); +	inet_get_local_port_range(&init_net, &low, &high);  	remaining = (high - low) + 1; -	rover = net_random() % remaining + low; +	rover = prandom_u32() % remaining + low;  retry:  	if (last_used_port != rover &&  	    !idr_find(ps, (unsigned short) rover)) { @@ -2156,67 +2354,135 @@ retry:  	return -EADDRNOTAVAIL;  } -static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) +/* + * Check that the requested port is available.  This is called when trying to + * bind to a specific port, or when trying to listen on a bound port.  In + * the latter case, the provided id_priv may already be on the bind_list, but + * we still need to check that it's okay to start listening. + */ +static int cma_check_port(struct rdma_bind_list *bind_list, +			  struct rdma_id_private *id_priv, uint8_t reuseaddr)  {  	struct rdma_id_private *cur_id; -	struct sockaddr_in *sin, *cur_sin; -	struct rdma_bind_list *bind_list; -	struct hlist_node *node; -	unsigned short snum; +	struct sockaddr *addr, *cur_addr; -	sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; -	snum = ntohs(sin->sin_port); -	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) -		return -EACCES; +	addr = cma_src_addr(id_priv); +	hlist_for_each_entry(cur_id, &bind_list->owners, node) { +		if (id_priv == cur_id) +			continue; -	bind_list = idr_find(ps, snum); -	if (!bind_list) -		return cma_alloc_port(ps, id_priv, snum); +		if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && +		    cur_id->reuseaddr) +			continue; -	/* -	 * We don't support binding to any address if anyone is bound to -	 * a specific address on the same port. -	 */ -	if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) -		return -EADDRNOTAVAIL; +		cur_addr = cma_src_addr(cur_id); +		if (id_priv->afonly && cur_id->afonly && +		    (addr->sa_family != cur_addr->sa_family)) +			continue; -	hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { -		if (cma_any_addr((struct sockaddr *) &cur_id->id.route.addr.src_addr)) +		if (cma_any_addr(addr) || cma_any_addr(cur_addr))  			return -EADDRNOTAVAIL; -		cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr; -		if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr) +		if (!cma_addr_cmp(addr, cur_addr))  			return -EADDRINUSE;  	} - -	cma_bind_port(bind_list, id_priv);  	return 0;  } -static int cma_get_port(struct rdma_id_private *id_priv) +static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)  { -	struct idr *ps; +	struct rdma_bind_list *bind_list; +	unsigned short snum;  	int ret; +	snum = ntohs(cma_port(cma_src_addr(id_priv))); +	if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) +		return -EACCES; + +	bind_list = idr_find(ps, snum); +	if (!bind_list) { +		ret = cma_alloc_port(ps, id_priv, snum); +	} else { +		ret = cma_check_port(bind_list, id_priv, id_priv->reuseaddr); +		if (!ret) +			cma_bind_port(bind_list, id_priv); +	} +	return ret; +} + +static int cma_bind_listen(struct rdma_id_private *id_priv) +{ +	struct rdma_bind_list *bind_list = id_priv->bind_list; +	int ret = 0; + +	mutex_lock(&lock); +	if (bind_list->owners.first->next) +		ret = cma_check_port(bind_list, id_priv, 0); +	mutex_unlock(&lock); +	return ret; +} + +static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv) +{  	switch (id_priv->id.ps) { -	case RDMA_PS_SDP: -		ps = &sdp_ps; -		break;  	case RDMA_PS_TCP: -		ps = &tcp_ps; -		break; +		return &tcp_ps;  	case RDMA_PS_UDP: -		ps = &udp_ps; -		break; +		return &udp_ps;  	case RDMA_PS_IPOIB: -		ps = &ipoib_ps; -		break; +		return &ipoib_ps; +	case RDMA_PS_IB: +		return &ib_ps;  	default: -		return -EPROTONOSUPPORT; +		return NULL;  	} +} + +static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv) +{ +	struct idr *ps = NULL; +	struct sockaddr_ib *sib; +	u64 sid_ps, mask, sid; + +	sib = (struct sockaddr_ib *) cma_src_addr(id_priv); +	mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; +	sid = be64_to_cpu(sib->sib_sid) & mask; + +	if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { +		sid_ps = RDMA_IB_IP_PS_IB; +		ps = &ib_ps; +	} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && +		   (sid == (RDMA_IB_IP_PS_TCP & mask))) { +		sid_ps = RDMA_IB_IP_PS_TCP; +		ps = &tcp_ps; +	} else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && +		   (sid == (RDMA_IB_IP_PS_UDP & mask))) { +		sid_ps = RDMA_IB_IP_PS_UDP; +		ps = &udp_ps; +	} + +	if (ps) { +		sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); +		sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | +						be64_to_cpu(sib->sib_sid_mask)); +	} +	return ps; +} + +static int cma_get_port(struct rdma_id_private *id_priv) +{ +	struct idr *ps; +	int ret; + +	if (cma_family(id_priv) != AF_IB) +		ps = cma_select_inet_ps(id_priv); +	else +		ps = cma_select_ib_ps(id_priv); +	if (!ps) +		return -EPROTONOSUPPORT;  	mutex_lock(&lock); -	if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)) +	if (cma_any_port(cma_src_addr(id_priv)))  		ret = cma_alloc_any_port(ps, id_priv);  	else  		ret = cma_use_port(ps, id_priv); @@ -2228,15 +2494,18 @@ static int cma_get_port(struct rdma_id_private *id_priv)  static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,  			       struct sockaddr *addr)  { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  	struct sockaddr_in6 *sin6;  	if (addr->sa_family != AF_INET6)  		return 0;  	sin6 = (struct sockaddr_in6 *) addr; -	if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && -	    !sin6->sin6_scope_id) + +	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) +		return 0; + +	if (!sin6->sin6_scope_id)  			return -EINVAL;  	dev_addr->bound_dev_if = sin6->sin6_scope_id; @@ -2244,108 +2513,132 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,  	return 0;  } +int rdma_listen(struct rdma_cm_id *id, int backlog) +{ +	struct rdma_id_private *id_priv; +	int ret; + +	id_priv = container_of(id, struct rdma_id_private, id); +	if (id_priv->state == RDMA_CM_IDLE) { +		id->route.addr.src_addr.ss_family = AF_INET; +		ret = rdma_bind_addr(id, cma_src_addr(id_priv)); +		if (ret) +			return ret; +	} + +	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) +		return -EINVAL; + +	if (id_priv->reuseaddr) { +		ret = cma_bind_listen(id_priv); +		if (ret) +			goto err; +	} + +	id_priv->backlog = backlog; +	if (id->device) { +		switch (rdma_node_get_transport(id->device->node_type)) { +		case RDMA_TRANSPORT_IB: +			ret = cma_ib_listen(id_priv); +			if (ret) +				goto err; +			break; +		case RDMA_TRANSPORT_IWARP: +			ret = cma_iw_listen(id_priv, backlog); +			if (ret) +				goto err; +			break; +		default: +			ret = -ENOSYS; +			goto err; +		} +	} else +		cma_listen_on_all(id_priv); + +	return 0; +err: +	id_priv->backlog = 0; +	cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); +	return ret; +} +EXPORT_SYMBOL(rdma_listen); +  int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)  {  	struct rdma_id_private *id_priv;  	int ret; -	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) +	if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && +	    addr->sa_family != AF_IB)  		return -EAFNOSUPPORT;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND)) +	if (!cma_comp_exch(id_priv, RDMA_CM_IDLE, RDMA_CM_ADDR_BOUND))  		return -EINVAL;  	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);  	if (ret)  		goto err1; +	memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));  	if (!cma_any_addr(addr)) { -		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); +		ret = cma_translate_addr(addr, &id->route.addr.dev_addr);  		if (ret)  			goto err1; -		mutex_lock(&lock); -		ret = cma_acquire_dev(id_priv); -		mutex_unlock(&lock); +		ret = cma_acquire_dev(id_priv, NULL);  		if (ret)  			goto err1;  	} -	memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); +	if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { +		if (addr->sa_family == AF_INET) +			id_priv->afonly = 1; +#if IS_ENABLED(CONFIG_IPV6) +		else if (addr->sa_family == AF_INET6) +			id_priv->afonly = init_net.ipv6.sysctl.bindv6only; +#endif +	}  	ret = cma_get_port(id_priv);  	if (ret)  		goto err2;  	return 0;  err2: -	if (id_priv->cma_dev) { -		mutex_lock(&lock); -		cma_detach_from_dev(id_priv); -		mutex_unlock(&lock); -	} +	if (id_priv->cma_dev) +		cma_release_dev(id_priv);  err1: -	cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE); +	cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);  	return ret;  }  EXPORT_SYMBOL(rdma_bind_addr); -static int cma_format_hdr(void *hdr, enum rdma_port_space ps, -			  struct rdma_route *route) +static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)  {  	struct cma_hdr *cma_hdr; -	struct sdp_hh *sdp_hdr; -	if (route->addr.src_addr.ss_family == AF_INET) { +	cma_hdr = hdr; +	cma_hdr->cma_version = CMA_VERSION; +	if (cma_family(id_priv) == AF_INET) {  		struct sockaddr_in *src4, *dst4; -		src4 = (struct sockaddr_in *) &route->addr.src_addr; -		dst4 = (struct sockaddr_in *) &route->addr.dst_addr; - -		switch (ps) { -		case RDMA_PS_SDP: -			sdp_hdr = hdr; -			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) -				return -EINVAL; -			sdp_set_ip_ver(sdp_hdr, 4); -			sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; -			sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; -			sdp_hdr->port = src4->sin_port; -			break; -		default: -			cma_hdr = hdr; -			cma_hdr->cma_version = CMA_VERSION; -			cma_set_ip_ver(cma_hdr, 4); -			cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; -			cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; -			cma_hdr->port = src4->sin_port; -			break; -		} -	} else { +		src4 = (struct sockaddr_in *) cma_src_addr(id_priv); +		dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); + +		cma_set_ip_ver(cma_hdr, 4); +		cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; +		cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; +		cma_hdr->port = src4->sin_port; +	} else if (cma_family(id_priv) == AF_INET6) {  		struct sockaddr_in6 *src6, *dst6; -		src6 = (struct sockaddr_in6 *) &route->addr.src_addr; -		dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr; - -		switch (ps) { -		case RDMA_PS_SDP: -			sdp_hdr = hdr; -			if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) -				return -EINVAL; -			sdp_set_ip_ver(sdp_hdr, 6); -			sdp_hdr->src_addr.ip6 = src6->sin6_addr; -			sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; -			sdp_hdr->port = src6->sin6_port; -			break; -		default: -			cma_hdr = hdr; -			cma_hdr->cma_version = CMA_VERSION; -			cma_set_ip_ver(cma_hdr, 6); -			cma_hdr->src_addr.ip6 = src6->sin6_addr; -			cma_hdr->dst_addr.ip6 = dst6->sin6_addr; -			cma_hdr->port = src6->sin6_port; -			break; -		} +		src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); +		dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); + +		cma_set_ip_ver(cma_hdr, 6); +		cma_hdr->src_addr.ip6 = src6->sin6_addr; +		cma_hdr->dst_addr.ip6 = dst6->sin6_addr; +		cma_hdr->port = src6->sin6_port;  	}  	return 0;  } @@ -2358,7 +2651,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,  	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;  	int ret = 0; -	if (cma_disable_callback(id_priv, CMA_CONNECT)) +	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))  		return 0;  	memset(&event, 0, sizeof event); @@ -2375,15 +2668,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,  			event.status = ib_event->param.sidr_rep_rcvd.status;  			break;  		} -		ret = cma_set_qkey(id_priv); +		ret = cma_set_qkey(id_priv, rep->qkey);  		if (ret) {  			event.event = RDMA_CM_EVENT_ADDR_ERROR; -			event.status = -EINVAL; -			break; -		} -		if (id_priv->qkey != rep->qkey) { -			event.event = RDMA_CM_EVENT_UNREACHABLE; -			event.status = -EINVAL; +			event.status = ret;  			break;  		}  		ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, @@ -2404,7 +2692,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,  	if (ret) {  		/* Destroy the CM ID by returning a non-zero value. */  		id_priv->cm_id.ib = NULL; -		cma_exch(id_priv, CMA_DESTROYING); +		cma_exch(id_priv, RDMA_CM_DESTROYING);  		mutex_unlock(&id_priv->handler_mutex);  		rdma_destroy_id(&id_priv->id);  		return ret; @@ -2418,34 +2706,45 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,  			      struct rdma_conn_param *conn_param)  {  	struct ib_cm_sidr_req_param req; -	struct rdma_route *route; -	int ret; +	struct ib_cm_id	*id; +	void *private_data; +	int offset, ret; -	req.private_data_len = sizeof(struct cma_hdr) + -			       conn_param->private_data_len; -	req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); -	if (!req.private_data) -		return -ENOMEM; +	memset(&req, 0, sizeof req); +	offset = cma_user_data_offset(id_priv); +	req.private_data_len = offset + conn_param->private_data_len; +	if (req.private_data_len < conn_param->private_data_len) +		return -EINVAL; + +	if (req.private_data_len) { +		private_data = kzalloc(req.private_data_len, GFP_ATOMIC); +		if (!private_data) +			return -ENOMEM; +	} else { +		private_data = NULL; +	}  	if (conn_param->private_data && conn_param->private_data_len) -		memcpy((void *) req.private_data + sizeof(struct cma_hdr), -		       conn_param->private_data, conn_param->private_data_len); +		memcpy(private_data + offset, conn_param->private_data, +		       conn_param->private_data_len); -	route = &id_priv->id.route; -	ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route); -	if (ret) -		goto out; +	if (private_data) { +		ret = cma_format_hdr(private_data, id_priv); +		if (ret) +			goto out; +		req.private_data = private_data; +	} -	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, -					    cma_sidr_rep_handler, id_priv); -	if (IS_ERR(id_priv->cm_id.ib)) { -		ret = PTR_ERR(id_priv->cm_id.ib); +	id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, +			     id_priv); +	if (IS_ERR(id)) { +		ret = PTR_ERR(id);  		goto out;  	} +	id_priv->cm_id.ib = id; -	req.path = route->path_rec; -	req.service_id = cma_get_service_id(id_priv->id.ps, -					    (struct sockaddr *) &route->addr.dst_addr); +	req.path = id_priv->id.route.path_rec; +	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));  	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);  	req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -2455,7 +2754,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,  		id_priv->cm_id.ib = NULL;  	}  out: -	kfree(req.private_data); +	kfree(private_data);  	return ret;  } @@ -2465,46 +2764,55 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,  	struct ib_cm_req_param req;  	struct rdma_route *route;  	void *private_data; +	struct ib_cm_id	*id;  	int offset, ret;  	memset(&req, 0, sizeof req); -	offset = cma_user_data_offset(id_priv->id.ps); +	offset = cma_user_data_offset(id_priv);  	req.private_data_len = offset + conn_param->private_data_len; -	private_data = kzalloc(req.private_data_len, GFP_ATOMIC); -	if (!private_data) -		return -ENOMEM; +	if (req.private_data_len < conn_param->private_data_len) +		return -EINVAL; + +	if (req.private_data_len) { +		private_data = kzalloc(req.private_data_len, GFP_ATOMIC); +		if (!private_data) +			return -ENOMEM; +	} else { +		private_data = NULL; +	}  	if (conn_param->private_data && conn_param->private_data_len)  		memcpy(private_data + offset, conn_param->private_data,  		       conn_param->private_data_len); -	id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler, -					    id_priv); -	if (IS_ERR(id_priv->cm_id.ib)) { -		ret = PTR_ERR(id_priv->cm_id.ib); +	id = ib_create_cm_id(id_priv->id.device, cma_ib_handler, id_priv); +	if (IS_ERR(id)) { +		ret = PTR_ERR(id);  		goto out;  	} +	id_priv->cm_id.ib = id;  	route = &id_priv->id.route; -	ret = cma_format_hdr(private_data, id_priv->id.ps, route); -	if (ret) -		goto out; -	req.private_data = private_data; +	if (private_data) { +		ret = cma_format_hdr(private_data, id_priv); +		if (ret) +			goto out; +		req.private_data = private_data; +	}  	req.primary_path = &route->path_rec[0];  	if (route->num_paths == 2)  		req.alternate_path = &route->path_rec[1]; -	req.service_id = cma_get_service_id(id_priv->id.ps, -					    (struct sockaddr *) &route->addr.dst_addr); +	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));  	req.qp_num = id_priv->qp_num; -	req.qp_type = IB_QPT_RC; +	req.qp_type = id_priv->id.qp_type;  	req.starting_psn = id_priv->seq_num;  	req.responder_resources = conn_param->responder_resources;  	req.initiator_depth = conn_param->initiator_depth;  	req.flow_control = conn_param->flow_control; -	req.retry_count = conn_param->retry_count; -	req.rnr_retry_count = conn_param->rnr_retry_count; +	req.retry_count = min_t(u8, 7, conn_param->retry_count); +	req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);  	req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;  	req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;  	req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -2512,8 +2820,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,  	ret = ib_send_cm_req(id_priv->cm_id.ib, &req);  out: -	if (ret && !IS_ERR(id_priv->cm_id.ib)) { -		ib_destroy_cm_id(id_priv->cm_id.ib); +	if (ret && !IS_ERR(id)) { +		ib_destroy_cm_id(id);  		id_priv->cm_id.ib = NULL;  	} @@ -2525,39 +2833,37 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,  			  struct rdma_conn_param *conn_param)  {  	struct iw_cm_id *cm_id; -	struct sockaddr_in* sin;  	int ret;  	struct iw_cm_conn_param iw_param;  	cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv); -	if (IS_ERR(cm_id)) { -		ret = PTR_ERR(cm_id); -		goto out; -	} +	if (IS_ERR(cm_id)) +		return PTR_ERR(cm_id);  	id_priv->cm_id.iw = cm_id; -	sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; -	cm_id->local_addr = *sin; - -	sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; -	cm_id->remote_addr = *sin; +	memcpy(&cm_id->local_addr, cma_src_addr(id_priv), +	       rdma_addr_size(cma_src_addr(id_priv))); +	memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), +	       rdma_addr_size(cma_dst_addr(id_priv)));  	ret = cma_modify_qp_rtr(id_priv, conn_param);  	if (ret)  		goto out; -	iw_param.ord = conn_param->initiator_depth; -	iw_param.ird = conn_param->responder_resources; -	iw_param.private_data = conn_param->private_data; -	iw_param.private_data_len = conn_param->private_data_len; -	if (id_priv->id.qp) +	if (conn_param) { +		iw_param.ord = conn_param->initiator_depth; +		iw_param.ird = conn_param->responder_resources; +		iw_param.private_data = conn_param->private_data; +		iw_param.private_data_len = conn_param->private_data_len; +		iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num; +	} else { +		memset(&iw_param, 0, sizeof iw_param);  		iw_param.qpn = id_priv->qp_num; -	else -		iw_param.qpn = conn_param->qp_num; +	}  	ret = iw_cm_connect(cm_id, &iw_param);  out: -	if (ret && !IS_ERR(cm_id)) { +	if (ret) {  		iw_destroy_cm_id(cm_id);  		id_priv->cm_id.iw = NULL;  	} @@ -2570,7 +2876,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT)) +	if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT))  		return -EINVAL;  	if (!id->qp) { @@ -2580,7 +2886,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)  	switch (rdma_node_get_transport(id->device->node_type)) {  	case RDMA_TRANSPORT_IB: -		if (cma_is_ud_ps(id->ps)) +		if (id->qp_type == IB_QPT_UD)  			ret = cma_resolve_ib_udp(id_priv, conn_param);  		else  			ret = cma_connect_ib(id_priv, conn_param); @@ -2597,7 +2903,7 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)  	return 0;  err: -	cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED); +	cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED);  	return ret;  }  EXPORT_SYMBOL(rdma_connect); @@ -2625,7 +2931,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,  	rep.initiator_depth = conn_param->initiator_depth;  	rep.failover_accepted = 0;  	rep.flow_control = conn_param->flow_control; -	rep.rnr_retry_count = conn_param->rnr_retry_count; +	rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count);  	rep.srq = id_priv->srq ? 1 : 0;  	ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); @@ -2656,7 +2962,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,  }  static int cma_send_sidr_rep(struct rdma_id_private *id_priv, -			     enum ib_cm_sidr_status status, +			     enum ib_cm_sidr_status status, u32 qkey,  			     const void *private_data, int private_data_len)  {  	struct ib_cm_sidr_rep_param rep; @@ -2665,7 +2971,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,  	memset(&rep, 0, sizeof rep);  	rep.status = status;  	if (status == IB_SIDR_SUCCESS) { -		ret = cma_set_qkey(id_priv); +		ret = cma_set_qkey(id_priv, qkey);  		if (ret)  			return ret;  		rep.qp_num = id_priv->qp_num; @@ -2683,7 +2989,10 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_comp(id_priv, CMA_CONNECT)) + +	id_priv->owner = task_pid_nr(current); + +	if (!cma_comp(id_priv, RDMA_CM_CONNECT))  		return -EINVAL;  	if (!id->qp && conn_param) { @@ -2693,14 +3002,21 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)  	switch (rdma_node_get_transport(id->device->node_type)) {  	case RDMA_TRANSPORT_IB: -		if (cma_is_ud_ps(id->ps)) -			ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, -						conn_param->private_data, -						conn_param->private_data_len); -		else if (conn_param) -			ret = cma_accept_ib(id_priv, conn_param); -		else -			ret = cma_rep_recv(id_priv); +		if (id->qp_type == IB_QPT_UD) { +			if (conn_param) +				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, +							conn_param->qkey, +							conn_param->private_data, +							conn_param->private_data_len); +			else +				ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, +							0, NULL, 0); +		} else { +			if (conn_param) +				ret = cma_accept_ib(id_priv, conn_param); +			else +				ret = cma_rep_recv(id_priv); +		}  		break;  	case RDMA_TRANSPORT_IWARP:  		ret = cma_accept_iw(id_priv, conn_param); @@ -2727,7 +3043,7 @@ int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_has_cm_dev(id_priv)) +	if (!id_priv->cm_id.ib)  		return -EINVAL;  	switch (id->device->node_type) { @@ -2749,13 +3065,13 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_has_cm_dev(id_priv)) +	if (!id_priv->cm_id.ib)  		return -EINVAL;  	switch (rdma_node_get_transport(id->device->node_type)) {  	case RDMA_TRANSPORT_IB: -		if (cma_is_ud_ps(id->ps)) -			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, +		if (id->qp_type == IB_QPT_UD) +			ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0,  						private_data, private_data_len);  		else  			ret = ib_send_cm_rej(id_priv->cm_id.ib, @@ -2780,7 +3096,7 @@ int rdma_disconnect(struct rdma_cm_id *id)  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_has_cm_dev(id_priv)) +	if (!id_priv->cm_id.ib)  		return -EINVAL;  	switch (rdma_node_get_transport(id->device->node_type)) { @@ -2812,14 +3128,16 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)  	int ret;  	id_priv = mc->id_priv; -	if (cma_disable_callback(id_priv, CMA_ADDR_BOUND) && -	    cma_disable_callback(id_priv, CMA_ADDR_RESOLVED)) +	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) && +	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))  		return 0; +	if (!status) +		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));  	mutex_lock(&id_priv->qp_mutex);  	if (!status && id_priv->id.qp)  		status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, -					 multicast->rec.mlid); +					 be16_to_cpu(multicast->rec.mlid));  	mutex_unlock(&id_priv->qp_mutex);  	memset(&event, 0, sizeof event); @@ -2837,7 +3155,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)  	ret = id_priv->id.event_handler(&id_priv->id, &event);  	if (ret) { -		cma_exch(id_priv, CMA_DESTROYING); +		cma_exch(id_priv, RDMA_CM_DESTROYING);  		mutex_unlock(&id_priv->handler_mutex);  		rdma_destroy_id(&id_priv->id);  		return 0; @@ -2862,6 +3180,8 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,  								 0xFF10A01B)) {  		/* IPv6 address is an SA assigned MGID. */  		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); +	} else if (addr->sa_family == AF_IB) { +		memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);  	} else if ((addr->sa_family == AF_INET6)) {  		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);  		if (id_priv->id.ps == RDMA_PS_UDP) @@ -2889,9 +3209,12 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,  	if (ret)  		return ret; +	ret = cma_set_qkey(id_priv, 0); +	if (ret) +		return ret; +  	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); -	if (id_priv->id.ps == RDMA_PS_UDP) -		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); +	rec.qkey = cpu_to_be32(id_priv->qkey);  	rdma_addr_get_sgid(dev_addr, &rec.port_gid);  	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));  	rec.join_state = 1; @@ -2904,16 +3227,16 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,  	if (id_priv->id.ps == RDMA_PS_IPOIB)  		comp_mask |= IB_SA_MCMEMBER_REC_RATE | -			     IB_SA_MCMEMBER_REC_RATE_SELECTOR; +			     IB_SA_MCMEMBER_REC_RATE_SELECTOR | +			     IB_SA_MCMEMBER_REC_MTU_SELECTOR | +			     IB_SA_MCMEMBER_REC_MTU | +			     IB_SA_MCMEMBER_REC_HOP_LIMIT;  	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,  						id_priv->id.port_num, &rec,  						comp_mask, GFP_KERNEL,  						cma_ib_mc_handler, mc); -	if (IS_ERR(mc->multicast.ib)) -		return PTR_ERR(mc->multicast.ib); - -	return 0; +	return PTR_ERR_OR_ZERO(mc->multicast.ib);  }  static void iboe_mcast_work_handler(struct work_struct *work) @@ -2996,7 +3319,8 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,  		err = -EINVAL;  		goto out2;  	} -	iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid); +	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, +		    &mc->multicast.ib->rec.port_gid);  	work->id = id_priv;  	work->mc = mc;  	INIT_WORK(&work->work, iboe_mcast_work_handler); @@ -3020,15 +3344,15 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,  	int ret;  	id_priv = container_of(id, struct rdma_id_private, id); -	if (!cma_comp(id_priv, CMA_ADDR_BOUND) && -	    !cma_comp(id_priv, CMA_ADDR_RESOLVED)) +	if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && +	    !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED))  		return -EINVAL;  	mc = kmalloc(sizeof *mc, GFP_KERNEL);  	if (!mc)  		return -ENOMEM; -	memcpy(&mc->addr, addr, ip_addr_size(addr)); +	memcpy(&mc->addr, addr, rdma_addr_size(addr));  	mc->context = context;  	mc->id_priv = id_priv; @@ -3073,14 +3397,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)  	id_priv = container_of(id, struct rdma_id_private, id);  	spin_lock_irq(&id_priv->lock);  	list_for_each_entry(mc, &id_priv->mc_list, list) { -		if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { +		if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) {  			list_del(&mc->list);  			spin_unlock_irq(&id_priv->lock);  			if (id->qp)  				ib_detach_mcast(id->qp,  						&mc->multicast.ib->rec.mgid, -						mc->multicast.ib->rec.mlid); +						be16_to_cpu(mc->multicast.ib->rec.mlid));  			if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {  				switch (rdma_port_get_link_layer(id->device, id->port_num)) {  				case IB_LINK_LAYER_INFINIBAND: @@ -3127,9 +3451,9 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id  }  static int cma_netdev_callback(struct notifier_block *self, unsigned long event, -			       void *ctx) +			       void *ptr)  { -	struct net_device *ndev = (struct net_device *)ctx; +	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);  	struct cma_device *cma_dev;  	struct rdma_id_private *id_priv;  	int ret = NOTIFY_DONE; @@ -3186,19 +3510,19 @@ static void cma_add_one(struct ib_device *device)  static int cma_remove_id_dev(struct rdma_id_private *id_priv)  {  	struct rdma_cm_event event; -	enum cma_state state; +	enum rdma_cm_state state;  	int ret = 0;  	/* Record that we want to remove the device */ -	state = cma_exch(id_priv, CMA_DEVICE_REMOVAL); -	if (state == CMA_DESTROYING) +	state = cma_exch(id_priv, RDMA_CM_DEVICE_REMOVAL); +	if (state == RDMA_CM_DESTROYING)  		return 0;  	cma_cancel_operation(id_priv, state);  	mutex_lock(&id_priv->handler_mutex);  	/* Check for destruction from another callback. */ -	if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL)) +	if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))  		goto out;  	memset(&event, 0, sizeof event); @@ -3253,6 +3577,85 @@ static void cma_remove_one(struct ib_device *device)  	kfree(cma_dev);  } +static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct nlmsghdr *nlh; +	struct rdma_cm_id_stats *id_stats; +	struct rdma_id_private *id_priv; +	struct rdma_cm_id *id = NULL; +	struct cma_device *cma_dev; +	int i_dev = 0, i_id = 0; + +	/* +	 * We export all of the IDs as a sequence of messages.  Each +	 * ID gets its own netlink message. +	 */ +	mutex_lock(&lock); + +	list_for_each_entry(cma_dev, &dev_list, list) { +		if (i_dev < cb->args[0]) { +			i_dev++; +			continue; +		} + +		i_id = 0; +		list_for_each_entry(id_priv, &cma_dev->id_list, list) { +			if (i_id < cb->args[1]) { +				i_id++; +				continue; +			} + +			id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, +						sizeof *id_stats, RDMA_NL_RDMA_CM, +						RDMA_NL_RDMA_CM_ID_STATS, +						NLM_F_MULTI); +			if (!id_stats) +				goto out; + +			memset(id_stats, 0, sizeof *id_stats); +			id = &id_priv->id; +			id_stats->node_type = id->route.addr.dev_addr.dev_type; +			id_stats->port_num = id->port_num; +			id_stats->bound_dev_if = +				id->route.addr.dev_addr.bound_dev_if; + +			if (ibnl_put_attr(skb, nlh, +					  rdma_addr_size(cma_src_addr(id_priv)), +					  cma_src_addr(id_priv), +					  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) +				goto out; +			if (ibnl_put_attr(skb, nlh, +					  rdma_addr_size(cma_src_addr(id_priv)), +					  cma_dst_addr(id_priv), +					  RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) +				goto out; + +			id_stats->pid		= id_priv->owner; +			id_stats->port_space	= id->ps; +			id_stats->cm_state	= id_priv->state; +			id_stats->qp_num	= id_priv->qp_num; +			id_stats->qp_type	= id->qp_type; + +			i_id++; +		} + +		cb->args[1] = 0; +		i_dev++; +	} + +out: +	mutex_unlock(&lock); +	cb->args[0] = i_dev; +	cb->args[1] = i_id; + +	return skb->len; +} + +static const struct ibnl_client_cbs cma_cb_table[] = { +	[RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats, +				       .module = THIS_MODULE }, +}; +  static int __init cma_init(void)  {  	int ret; @@ -3268,6 +3671,10 @@ static int __init cma_init(void)  	ret = ib_register_client(&cma_client);  	if (ret)  		goto err; + +	if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table)) +		printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n"); +  	return 0;  err: @@ -3280,15 +3687,16 @@ err:  static void __exit cma_cleanup(void)  { +	ibnl_remove_client(RDMA_NL_RDMA_CM);  	ib_unregister_client(&cma_client);  	unregister_netdevice_notifier(&cma_nb);  	rdma_addr_unregister_client(&addr_client);  	ib_sa_unregister_client(&sa_client);  	destroy_workqueue(cma_wq); -	idr_destroy(&sdp_ps);  	idr_destroy(&tcp_ps);  	idr_destroy(&udp_ps);  	idr_destroy(&ipoib_ps); +	idr_destroy(&ib_ps);  }  module_init(cma_init); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a565af5c2d2..87d1936f5c1 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -49,4 +49,6 @@ void ib_sysfs_cleanup(void);  int  ib_cache_setup(void);  void ib_cache_cleanup(void); +int ib_resolve_eth_l2_attrs(struct ib_qp *qp, +			    struct ib_qp_attr *qp_attr, int *qp_attr_mask);  #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index a19effad081..18c1ece765f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -38,7 +38,7 @@  #include <linux/slab.h>  #include <linux/init.h>  #include <linux/mutex.h> -#include <linux/workqueue.h> +#include <rdma/rdma_netlink.h>  #include "core_priv.h" @@ -52,6 +52,9 @@ struct ib_client_data {  	void *            data;  }; +struct workqueue_struct *ib_wq; +EXPORT_SYMBOL_GPL(ib_wq); +  static LIST_HEAD(device_list);  static LIST_HEAD(client_list); @@ -624,6 +627,9 @@ int ib_modify_device(struct ib_device *device,  		     int device_modify_mask,  		     struct ib_device_modify *device_modify)  { +	if (!device->modify_device) +		return -ENOSYS; +  	return device->modify_device(device, device_modify_mask,  				     device_modify);  } @@ -644,6 +650,9 @@ int ib_modify_port(struct ib_device *device,  		   u8 port_num, int port_modify_mask,  		   struct ib_port_modify *port_modify)  { +	if (!device->modify_port) +		return -ENOSYS; +  	if (port_num < start_port(device) || port_num > end_port(device))  		return -EINVAL; @@ -698,18 +707,28 @@ int ib_find_pkey(struct ib_device *device,  {  	int ret, i;  	u16 tmp_pkey; +	int partial_ix = -1;  	for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {  		ret = ib_query_pkey(device, port_num, i, &tmp_pkey);  		if (ret)  			return ret; -  		if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { -			*index = i; -			return 0; +			/* if there is full-member pkey take it.*/ +			if (tmp_pkey & 0x8000) { +				*index = i; +				return 0; +			} +			if (partial_ix < 0) +				partial_ix = i;  		}  	} +	/*no full-member, if exists take the limited*/ +	if (partial_ix >= 0) { +		*index = partial_ix; +		return 0; +	}  	return -ENOENT;  }  EXPORT_SYMBOL(ib_find_pkey); @@ -718,25 +737,48 @@ static int __init ib_core_init(void)  {  	int ret; +	ib_wq = alloc_workqueue("infiniband", 0, 0); +	if (!ib_wq) +		return -ENOMEM; +  	ret = ib_sysfs_setup(); -	if (ret) +	if (ret) {  		printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); +		goto err; +	} + +	ret = ibnl_init(); +	if (ret) { +		printk(KERN_WARNING "Couldn't init IB netlink interface\n"); +		goto err_sysfs; +	}  	ret = ib_cache_setup();  	if (ret) {  		printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n"); -		ib_sysfs_cleanup(); +		goto err_nl;  	} +	return 0; + +err_nl: +	ibnl_cleanup(); + +err_sysfs: +	ib_sysfs_cleanup(); + +err: +	destroy_workqueue(ib_wq);  	return ret;  }  static void __exit ib_core_cleanup(void)  {  	ib_cache_cleanup(); +	ibnl_cleanup();  	ib_sysfs_cleanup();  	/* Make sure that any pending umem accounting work is done. */ -	flush_scheduled_work(); +	destroy_workqueue(ib_wq);  }  module_init(ib_core_init); diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 4507043d24c..9f5ad7cc33c 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -33,6 +33,7 @@  #include <linux/errno.h>  #include <linux/spinlock.h> +#include <linux/export.h>  #include <linux/slab.h>  #include <linux/jhash.h>  #include <linux/kthread.h> @@ -117,14 +118,13 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool,  {  	struct hlist_head *bucket;  	struct ib_pool_fmr *fmr; -	struct hlist_node *pos;  	if (!pool->cache_bucket)  		return NULL;  	bucket = pool->cache_bucket + ib_fmr_hash(*page_list); -	hlist_for_each_entry(fmr, pos, bucket, cache_node) +	hlist_for_each_entry(fmr, bucket, cache_node)  		if (io_virtual_address == fmr->io_virtual_address &&  		    page_list_len      == fmr->page_list_len      &&  		    !memcmp(page_list, fmr->page_list, diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 2a1e9ae134b..3d2e489ab73 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -45,6 +45,7 @@  #include <linux/workqueue.h>  #include <linux/completion.h>  #include <linux/slab.h> +#include <linux/module.h>  #include <rdma/iw_cm.h>  #include <rdma/ib_addr.h> @@ -180,9 +181,16 @@ static void add_ref(struct iw_cm_id *cm_id)  static void rem_ref(struct iw_cm_id *cm_id)  {  	struct iwcm_id_private *cm_id_priv; +	int cb_destroy; +  	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); -	if (iwcm_deref_id(cm_id_priv) && -	    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { + +	/* +	 * Test bit before deref in case the cm_id gets freed on another +	 * thread. +	 */ +	cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); +	if (iwcm_deref_id(cm_id_priv) && cb_destroy) {  		BUG_ON(!list_empty(&cm_id_priv->work_list));  		free_cm_id(cm_id_priv);  	} @@ -326,7 +334,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)  {  	struct iwcm_id_private *cm_id_priv;  	unsigned long flags; -	int ret;  	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);  	/* @@ -342,7 +349,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)  		cm_id_priv->state = IW_CM_STATE_DESTROYING;  		spin_unlock_irqrestore(&cm_id_priv->lock, flags);  		/* destroy the listening endpoint */ -		ret = cm_id->device->iwcm->destroy_listen(cm_id); +		cm_id->device->iwcm->destroy_listen(cm_id);  		spin_lock_irqsave(&cm_id_priv->lock, flags);  		break;  	case IW_CM_STATE_ESTABLISHED: @@ -623,17 +630,6 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,  	 */  	BUG_ON(iw_event->status); -	/* -	 * We could be destroying the listening id. If so, ignore this -	 * upcall. -	 */ -	spin_lock_irqsave(&listen_id_priv->lock, flags); -	if (listen_id_priv->state != IW_CM_STATE_LISTEN) { -		spin_unlock_irqrestore(&listen_id_priv->lock, flags); -		goto out; -	} -	spin_unlock_irqrestore(&listen_id_priv->lock, flags); -  	cm_id = iw_create_cm_id(listen_id_priv->id.device,  				listen_id_priv->id.cm_handler,  				listen_id_priv->id.context); @@ -648,6 +644,19 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,  	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);  	cm_id_priv->state = IW_CM_STATE_CONN_RECV; +	/* +	 * We could be destroying the listening id. If so, ignore this +	 * upcall. +	 */ +	spin_lock_irqsave(&listen_id_priv->lock, flags); +	if (listen_id_priv->state != IW_CM_STATE_LISTEN) { +		spin_unlock_irqrestore(&listen_id_priv->lock, flags); +		iw_cm_reject(cm_id, NULL, 0); +		iw_destroy_cm_id(cm_id); +		goto out; +	} +	spin_unlock_irqrestore(&listen_id_priv->lock, flags); +  	ret = alloc_work_entries(cm_id_priv, 3);  	if (ret) {  		iw_cm_reject(cm_id, NULL, 0); @@ -725,7 +734,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,  	 */  	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);  	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); -	if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) { +	if (iw_event->status == 0) {  		cm_id_priv->id.local_addr = iw_event->local_addr;  		cm_id_priv->id.remote_addr = iw_event->remote_addr;  		cm_id_priv->state = IW_CM_STATE_ESTABLISHED; @@ -875,6 +884,8 @@ static void cm_work_handler(struct work_struct *_work)  			}  			return;  		} +		if (empty) +			return;  		spin_lock_irqsave(&cm_id_priv->lock, flags);  	}  	spin_unlock_irqrestore(&cm_id_priv->lock, flags); diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c new file mode 100644 index 00000000000..b85ddbc979e --- /dev/null +++ b/drivers/infiniband/core/iwpm_msg.c @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses.  You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + *     Redistribution and use in source and binary forms, with or + *     without modification, are permitted provided that the following + *     conditions are met: + * + *      - Redistributions of source code must retain the above + *        copyright notice, this list of conditions and the following + *        disclaimer. + * + *      - Redistributions in binary form must reproduce the above + *        copyright notice, this list of conditions and the following + *        disclaimer in the documentation and/or other materials + *        provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "iwpm_util.h" + +static const char iwpm_ulib_name[] = "iWarpPortMapperUser"; +static int iwpm_ulib_version = 3; +static int iwpm_user_pid = IWPM_PID_UNDEFINED; +static atomic_t echo_nlmsg_seq; + +int iwpm_valid_pid(void) +{ +	return iwpm_user_pid > 0; +} +EXPORT_SYMBOL(iwpm_valid_pid); + +/* + * iwpm_register_pid - Send a netlink query to user space + *                     for the iwarp port mapper pid + * + * nlmsg attributes: + *	[IWPM_NLA_REG_PID_SEQ] + *	[IWPM_NLA_REG_IF_NAME] + *	[IWPM_NLA_REG_IBDEV_NAME] + *	[IWPM_NLA_REG_ULIB_NAME] + */ +int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) +{ +	struct sk_buff *skb = NULL; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	if (!iwpm_valid_client(nl_client)) { +		err_str = "Invalid port mapper client"; +		goto pid_query_error; +	} +	if (iwpm_registered_client(nl_client)) +		return 0; +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client); +	if (!skb) { +		err_str = "Unable to create a nlmsg"; +		goto pid_query_error; +	} +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL); +	if (!nlmsg_request) { +		err_str = "Unable to allocate netlink request"; +		goto pid_query_error; +	} +	msg_seq = atomic_read(&echo_nlmsg_seq); + +	/* fill in the pid request message */ +	err_str = "Unable to put attribute of the nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ); +	if (ret) +		goto pid_query_error; +	ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE, +				pm_msg->if_name, IWPM_NLA_REG_IF_NAME); +	if (ret) +		goto pid_query_error; +	ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE, +				pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME); +	if (ret) +		goto pid_query_error; +	ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE, +				(char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME); +	if (ret) +		goto pid_query_error; + +	pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n", +		__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name); + +	ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL); +	if (ret) { +		skb = NULL; /* skb is freed in the netlink send-op handling */ +		iwpm_set_registered(nl_client, 1); +		iwpm_user_pid = IWPM_PID_UNAVAILABLE; +		err_str = "Unable to send a nlmsg"; +		goto pid_query_error; +	} +	nlmsg_request->req_buffer = pm_msg; +	ret = iwpm_wait_complete_req(nlmsg_request); +	return ret; +pid_query_error: +	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +	if (skb) +		dev_kfree_skb(skb); +	if (nlmsg_request) +		iwpm_free_nlmsg_request(&nlmsg_request->kref); +	return ret; +} +EXPORT_SYMBOL(iwpm_register_pid); + +/* + * iwpm_add_mapping - Send a netlink add mapping message + *                    to the port mapper + * nlmsg attributes: + *	[IWPM_NLA_MANAGE_MAPPING_SEQ] + *	[IWPM_NLA_MANAGE_ADDR] + */ +int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) +{ +	struct sk_buff *skb = NULL; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	if (!iwpm_valid_client(nl_client)) { +		err_str = "Invalid port mapper client"; +		goto add_mapping_error; +	} +	if (!iwpm_registered_client(nl_client)) { +		err_str = "Unregistered port mapper client"; +		goto add_mapping_error; +	} +	if (!iwpm_valid_pid()) +		return 0; +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client); +	if (!skb) { +		err_str = "Unable to create a nlmsg"; +		goto add_mapping_error; +	} +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL); +	if (!nlmsg_request) { +		err_str = "Unable to allocate netlink request"; +		goto add_mapping_error; +	} +	msg_seq = atomic_read(&echo_nlmsg_seq); +	/* fill in the add mapping message */ +	err_str = "Unable to put attribute of the nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, +				IWPM_NLA_MANAGE_MAPPING_SEQ); +	if (ret) +		goto add_mapping_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), +				&pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR); +	if (ret) +		goto add_mapping_error; +	nlmsg_request->req_buffer = pm_msg; + +	ret = ibnl_unicast(skb, nlh, iwpm_user_pid); +	if (ret) { +		skb = NULL; /* skb is freed in the netlink send-op handling */ +		iwpm_user_pid = IWPM_PID_UNDEFINED; +		err_str = "Unable to send a nlmsg"; +		goto add_mapping_error; +	} +	ret = iwpm_wait_complete_req(nlmsg_request); +	return ret; +add_mapping_error: +	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +	if (skb) +		dev_kfree_skb(skb); +	if (nlmsg_request) +		iwpm_free_nlmsg_request(&nlmsg_request->kref); +	return ret; +} +EXPORT_SYMBOL(iwpm_add_mapping); + +/* + * iwpm_add_and_query_mapping - Send a netlink add and query + *                              mapping message to the port mapper + * nlmsg attributes: + *	[IWPM_NLA_QUERY_MAPPING_SEQ] + *	[IWPM_NLA_QUERY_LOCAL_ADDR] + *	[IWPM_NLA_QUERY_REMOTE_ADDR] + */ +int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) +{ +	struct sk_buff *skb = NULL; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	if (!iwpm_valid_client(nl_client)) { +		err_str = "Invalid port mapper client"; +		goto query_mapping_error; +	} +	if (!iwpm_registered_client(nl_client)) { +		err_str = "Unregistered port mapper client"; +		goto query_mapping_error; +	} +	if (!iwpm_valid_pid()) +		return 0; +	ret = -ENOMEM; +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client); +	if (!skb) { +		err_str = "Unable to create a nlmsg"; +		goto query_mapping_error; +	} +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, +				nl_client, GFP_KERNEL); +	if (!nlmsg_request) { +		err_str = "Unable to allocate netlink request"; +		goto query_mapping_error; +	} +	msg_seq = atomic_read(&echo_nlmsg_seq); + +	/* fill in the query message */ +	err_str = "Unable to put attribute of the nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, +				IWPM_NLA_QUERY_MAPPING_SEQ); +	if (ret) +		goto query_mapping_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), +				&pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR); +	if (ret) +		goto query_mapping_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), +				&pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR); +	if (ret) +		goto query_mapping_error; +	nlmsg_request->req_buffer = pm_msg; + +	ret = ibnl_unicast(skb, nlh, iwpm_user_pid); +	if (ret) { +		skb = NULL; /* skb is freed in the netlink send-op handling */ +		err_str = "Unable to send a nlmsg"; +		goto query_mapping_error; +	} +	ret = iwpm_wait_complete_req(nlmsg_request); +	return ret; +query_mapping_error: +	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +	if (skb) +		dev_kfree_skb(skb); +	if (nlmsg_request) +		iwpm_free_nlmsg_request(&nlmsg_request->kref); +	return ret; +} +EXPORT_SYMBOL(iwpm_add_and_query_mapping); + +/* + * iwpm_remove_mapping - Send a netlink remove mapping message + *                       to the port mapper + * nlmsg attributes: + *	[IWPM_NLA_MANAGE_MAPPING_SEQ] + *	[IWPM_NLA_MANAGE_ADDR] + */ +int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) +{ +	struct sk_buff *skb = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	if (!iwpm_valid_client(nl_client)) { +		err_str = "Invalid port mapper client"; +		goto remove_mapping_error; +	} +	if (!iwpm_registered_client(nl_client)) { +		err_str = "Unregistered port mapper client"; +		goto remove_mapping_error; +	} +	if (!iwpm_valid_pid()) +		return 0; +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client); +	if (!skb) { +		ret = -ENOMEM; +		err_str = "Unable to create a nlmsg"; +		goto remove_mapping_error; +	} +	msg_seq = atomic_read(&echo_nlmsg_seq); +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	err_str = "Unable to put attribute of the nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, +				IWPM_NLA_MANAGE_MAPPING_SEQ); +	if (ret) +		goto remove_mapping_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), +				local_addr, IWPM_NLA_MANAGE_ADDR); +	if (ret) +		goto remove_mapping_error; + +	ret = ibnl_unicast(skb, nlh, iwpm_user_pid); +	if (ret) { +		skb = NULL; /* skb is freed in the netlink send-op handling */ +		iwpm_user_pid = IWPM_PID_UNDEFINED; +		err_str = "Unable to send a nlmsg"; +		goto remove_mapping_error; +	} +	iwpm_print_sockaddr(local_addr, +			"remove_mapping: Local sockaddr:"); +	return 0; +remove_mapping_error: +	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +	if (skb) +		dev_kfree_skb_any(skb); +	return ret; +} +EXPORT_SYMBOL(iwpm_remove_mapping); + +/* netlink attribute policy for the received response to register pid request */ +static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = { +	[IWPM_NLA_RREG_PID_SEQ]     = { .type = NLA_U32 }, +	[IWPM_NLA_RREG_IBDEV_NAME]  = { .type = NLA_STRING, +					.len = IWPM_DEVNAME_SIZE - 1 }, +	[IWPM_NLA_RREG_ULIB_NAME]   = { .type = NLA_STRING, +					.len = IWPM_ULIBNAME_SIZE - 1 }, +	[IWPM_NLA_RREG_ULIB_VER]    = { .type = NLA_U16 }, +	[IWPM_NLA_RREG_PID_ERR]     = { .type = NLA_U16 } +}; + +/* + * iwpm_register_pid_cb - Process a port mapper response to + *                        iwpm_register_pid() + */ +int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX]; +	struct iwpm_dev_data *pm_msg; +	char *dev_name, *iwpm_name; +	u32 msg_seq; +	u8 nl_client; +	u16 iwpm_version; +	const char *msg_type = "Register Pid response"; + +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX, +				resp_reg_policy, nltb, msg_type)) +		return -EINVAL; + +	msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]); +	nlmsg_request = iwpm_find_nlmsg_request(msg_seq); +	if (!nlmsg_request) { +		pr_info("%s: Could not find a matching request (seq = %u)\n", +				 __func__, msg_seq); +		return -EINVAL; +	} +	pm_msg = nlmsg_request->req_buffer; +	nl_client = nlmsg_request->nl_client; +	dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]); +	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]); +	iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]); + +	/* check device name, ulib name and version */ +	if (strcmp(pm_msg->dev_name, dev_name) || +			strcmp(iwpm_ulib_name, iwpm_name) || +			iwpm_version != iwpm_ulib_version) { + +		pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n", +				__func__, dev_name, iwpm_name, iwpm_version); +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto register_pid_response_exit; +	} +	iwpm_user_pid = cb->nlh->nlmsg_pid; +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); +	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", +			__func__, iwpm_user_pid); +	if (iwpm_valid_client(nl_client)) +		iwpm_set_registered(nl_client, 1); +register_pid_response_exit: +	nlmsg_request->request_done = 1; +	/* always for found nlmsg_request */ +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	barrier(); +	wake_up(&nlmsg_request->waitq); +	return 0; +} +EXPORT_SYMBOL(iwpm_register_pid_cb); + +/* netlink attribute policy for the received response to add mapping request */ +static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = { +	[IWPM_NLA_MANAGE_MAPPING_SEQ]     = { .type = NLA_U32 }, +	[IWPM_NLA_MANAGE_ADDR]            = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_RMANAGE_MAPPING_ERR]	  = { .type = NLA_U16 } +}; + +/* + * iwpm_add_mapping_cb - Process a port mapper response to + *                       iwpm_add_mapping() + */ +int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct iwpm_sa_data *pm_msg; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX]; +	struct sockaddr_storage *local_sockaddr; +	struct sockaddr_storage *mapped_sockaddr; +	const char *msg_type; +	u32 msg_seq; + +	msg_type = "Add Mapping response"; +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX, +				resp_add_policy, nltb, msg_type)) +		return -EINVAL; + +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + +	msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]); +	nlmsg_request = iwpm_find_nlmsg_request(msg_seq); +	if (!nlmsg_request) { +		pr_info("%s: Could not find a matching request (seq = %u)\n", +				 __func__, msg_seq); +		return -EINVAL; +	} +	pm_msg = nlmsg_request->req_buffer; +	local_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_MANAGE_ADDR]); +	mapped_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]); + +	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) { +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto add_mapping_response_exit; +	} +	if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) { +		pr_info("%s: Sockaddr family doesn't match the requested one\n", +				__func__); +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto add_mapping_response_exit; +	} +	memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr, +			sizeof(*mapped_sockaddr)); +	iwpm_print_sockaddr(&pm_msg->loc_addr, +			"add_mapping: Local sockaddr:"); +	iwpm_print_sockaddr(&pm_msg->mapped_loc_addr, +			"add_mapping: Mapped local sockaddr:"); + +add_mapping_response_exit: +	nlmsg_request->request_done = 1; +	/* always for found request */ +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	barrier(); +	wake_up(&nlmsg_request->waitq); +	return 0; +} +EXPORT_SYMBOL(iwpm_add_mapping_cb); + +/* netlink attribute policy for the response to add and query mapping request */ +static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = { +	[IWPM_NLA_QUERY_MAPPING_SEQ]      = { .type = NLA_U32 }, +	[IWPM_NLA_QUERY_LOCAL_ADDR]       = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_QUERY_REMOTE_ADDR]      = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_RQUERY_MAPPING_ERR]	  = { .type = NLA_U16 } +}; + +/* + * iwpm_add_and_query_mapping_cb - Process a port mapper response to + *                                 iwpm_add_and_query_mapping() + */ +int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, +				struct netlink_callback *cb) +{ +	struct iwpm_sa_data *pm_msg; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX]; +	struct sockaddr_storage *local_sockaddr, *remote_sockaddr; +	struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr; +	const char *msg_type; +	u32 msg_seq; +	u16 err_code; + +	msg_type = "Query Mapping response"; +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX, +				resp_query_policy, nltb, msg_type)) +		return -EINVAL; +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + +	msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]); +	nlmsg_request = iwpm_find_nlmsg_request(msg_seq); +	if (!nlmsg_request) { +		pr_info("%s: Could not find a matching request (seq = %u)\n", +				 __func__, msg_seq); +			return -EINVAL; +	} +	pm_msg = nlmsg_request->req_buffer; +	local_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); +	remote_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); +	mapped_loc_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); +	mapped_rem_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]); + +	err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]); +	if (err_code == IWPM_REMOTE_QUERY_REJECT) { +		pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n", +			__func__, cb->nlh->nlmsg_pid, msg_seq); +		nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT; +	} +	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) || +		iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) { +		pr_info("%s: Incorrect local sockaddr\n", __func__); +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto query_mapping_response_exit; +	} +	if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family || +		mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) { +		pr_info("%s: Sockaddr family doesn't match the requested one\n", +				__func__); +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto query_mapping_response_exit; +	} +	memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr, +			sizeof(*mapped_loc_sockaddr)); +	memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr, +			sizeof(*mapped_rem_sockaddr)); + +	iwpm_print_sockaddr(&pm_msg->loc_addr, +			"query_mapping: Local sockaddr:"); +	iwpm_print_sockaddr(&pm_msg->mapped_loc_addr, +			"query_mapping: Mapped local sockaddr:"); +	iwpm_print_sockaddr(&pm_msg->rem_addr, +			"query_mapping: Remote sockaddr:"); +	iwpm_print_sockaddr(&pm_msg->mapped_rem_addr, +			"query_mapping: Mapped remote sockaddr:"); +query_mapping_response_exit: +	nlmsg_request->request_done = 1; +	/* always for found request */ +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	barrier(); +	wake_up(&nlmsg_request->waitq); +	return 0; +} +EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); + +/* netlink attribute policy for the received request for mapping info */ +static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { +	[IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING, +					.len = IWPM_ULIBNAME_SIZE - 1 }, +	[IWPM_NLA_MAPINFO_ULIB_VER]  = { .type = NLA_U16 } +}; + +/* + * iwpm_mapping_info_cb - Process a port mapper request for mapping info + */ +int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX]; +	const char *msg_type = "Mapping Info response"; +	int iwpm_pid; +	u8 nl_client; +	char *iwpm_name; +	u16 iwpm_version; +	int ret = -EINVAL; + +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX, +				resp_mapinfo_policy, nltb, msg_type)) { +		pr_info("%s: Unable to parse nlmsg\n", __func__); +		return ret; +	} +	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]); +	iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]); +	if (strcmp(iwpm_ulib_name, iwpm_name) || +			iwpm_version != iwpm_ulib_version) { +		pr_info("%s: Invalid port mapper name = %s version = %d\n", +				__func__, iwpm_name, iwpm_version); +		return ret; +	} +	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); +	if (!iwpm_valid_client(nl_client)) { +		pr_info("%s: Invalid port mapper client = %d\n", +				__func__, nl_client); +		return ret; +	} +	iwpm_set_registered(nl_client, 0); +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); +	if (!iwpm_mapinfo_available()) +		return 0; +	iwpm_pid = cb->nlh->nlmsg_pid; +	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", +		 __func__, iwpm_pid); +	ret = iwpm_send_mapinfo(nl_client, iwpm_pid); +	return ret; +} +EXPORT_SYMBOL(iwpm_mapping_info_cb); + +/* netlink attribute policy for the received mapping info ack */ +static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = { +	[IWPM_NLA_MAPINFO_SEQ]    =   { .type = NLA_U32 }, +	[IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 }, +	[IWPM_NLA_MAPINFO_ACK_NUM] =  { .type = NLA_U32 } +}; + +/* + * iwpm_ack_mapping_info_cb - Process a port mapper ack for + *                            the provided mapping info records + */ +int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX]; +	u32 mapinfo_send, mapinfo_ack; +	const char *msg_type = "Mapping Info Ack"; + +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX, +				ack_mapinfo_policy, nltb, msg_type)) +		return -EINVAL; +	mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]); +	mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]); +	if (mapinfo_ack != mapinfo_send) +		pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n", +			__func__, mapinfo_send, mapinfo_ack); +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); +	return 0; +} +EXPORT_SYMBOL(iwpm_ack_mapping_info_cb); + +/* netlink attribute policy for the received port mapper error message */ +static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = { +	[IWPM_NLA_ERR_SEQ]        = { .type = NLA_U32 }, +	[IWPM_NLA_ERR_CODE]       = { .type = NLA_U16 }, +}; + +/* + * iwpm_mapping_error_cb - Process a port mapper error message + */ +int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); +	struct nlattr *nltb[IWPM_NLA_ERR_MAX]; +	u32 msg_seq; +	u16 err_code; +	const char *msg_type = "Mapping Error Msg"; + +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX, +				map_error_policy, nltb, msg_type)) +		return -EINVAL; + +	msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]); +	err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]); +	pr_info("%s: Received msg seq = %u err code = %u client = %d\n", +				__func__, msg_seq, err_code, nl_client); +	/* look for nlmsg_request */ +	nlmsg_request = iwpm_find_nlmsg_request(msg_seq); +	if (!nlmsg_request) { +		/* not all errors have associated requests */ +		pr_debug("Could not find matching req (seq = %u)\n", msg_seq); +		return 0; +	} +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); +	nlmsg_request->err_code = err_code; +	nlmsg_request->request_done = 1; +	/* always for found request */ +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	barrier(); +	wake_up(&nlmsg_request->waitq); +	return 0; +} +EXPORT_SYMBOL(iwpm_mapping_error_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c new file mode 100644 index 00000000000..69e9f84c160 --- /dev/null +++ b/drivers/infiniband/core/iwpm_util.c @@ -0,0 +1,607 @@ +/* + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses.  You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + *     Redistribution and use in source and binary forms, with or + *     without modification, are permitted provided that the following + *     conditions are met: + * + *      - Redistributions of source code must retain the above + *	  copyright notice, this list of conditions and the following + *	  disclaimer. + * + *      - Redistributions in binary form must reproduce the above + *	  copyright notice, this list of conditions and the following + *	  disclaimer in the documentation and/or other materials + *	  provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "iwpm_util.h" + +#define IWPM_HASH_BUCKET_SIZE	512 +#define IWPM_HASH_BUCKET_MASK	(IWPM_HASH_BUCKET_SIZE - 1) + +static LIST_HEAD(iwpm_nlmsg_req_list); +static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); + +static struct hlist_head *iwpm_hash_bucket; +static DEFINE_SPINLOCK(iwpm_mapinfo_lock); + +static DEFINE_MUTEX(iwpm_admin_lock); +static struct iwpm_admin_data iwpm_admin; + +int iwpm_init(u8 nl_client) +{ +	if (iwpm_valid_client(nl_client)) +		return -EINVAL; +	mutex_lock(&iwpm_admin_lock); +	if (atomic_read(&iwpm_admin.refcount) == 0) { +		iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE * +					sizeof(struct hlist_head), GFP_KERNEL); +		if (!iwpm_hash_bucket) { +			mutex_unlock(&iwpm_admin_lock); +			pr_err("%s Unable to create mapinfo hash table\n", __func__); +			return -ENOMEM; +		} +	} +	atomic_inc(&iwpm_admin.refcount); +	mutex_unlock(&iwpm_admin_lock); +	iwpm_set_valid(nl_client, 1); +	return 0; +} +EXPORT_SYMBOL(iwpm_init); + +static void free_hash_bucket(void); + +int iwpm_exit(u8 nl_client) +{ + +	if (!iwpm_valid_client(nl_client)) +		return -EINVAL; +	mutex_lock(&iwpm_admin_lock); +	if (atomic_read(&iwpm_admin.refcount) == 0) { +		mutex_unlock(&iwpm_admin_lock); +		pr_err("%s Incorrect usage - negative refcount\n", __func__); +		return -EINVAL; +	} +	if (atomic_dec_and_test(&iwpm_admin.refcount)) { +		free_hash_bucket(); +		pr_debug("%s: Mapinfo hash table is destroyed\n", __func__); +	} +	mutex_unlock(&iwpm_admin_lock); +	iwpm_set_valid(nl_client, 0); +	return 0; +} +EXPORT_SYMBOL(iwpm_exit); + +static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *, +					       struct sockaddr_storage *); + +int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, +			struct sockaddr_storage *mapped_sockaddr, +			u8 nl_client) +{ +	struct hlist_head *hash_bucket_head; +	struct iwpm_mapping_info *map_info; +	unsigned long flags; + +	if (!iwpm_valid_client(nl_client)) +		return -EINVAL; +	map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); +	if (!map_info) { +		pr_err("%s: Unable to allocate a mapping info\n", __func__); +		return -ENOMEM; +	} +	memcpy(&map_info->local_sockaddr, local_sockaddr, +	       sizeof(struct sockaddr_storage)); +	memcpy(&map_info->mapped_sockaddr, mapped_sockaddr, +	       sizeof(struct sockaddr_storage)); +	map_info->nl_client = nl_client; + +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	if (iwpm_hash_bucket) { +		hash_bucket_head = get_hash_bucket_head( +					&map_info->local_sockaddr, +					&map_info->mapped_sockaddr); +		hlist_add_head(&map_info->hlist_node, hash_bucket_head); +	} +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +	return 0; +} +EXPORT_SYMBOL(iwpm_create_mapinfo); + +int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, +			struct sockaddr_storage *mapped_local_addr) +{ +	struct hlist_node *tmp_hlist_node; +	struct hlist_head *hash_bucket_head; +	struct iwpm_mapping_info *map_info = NULL; +	unsigned long flags; +	int ret = -EINVAL; + +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	if (iwpm_hash_bucket) { +		hash_bucket_head = get_hash_bucket_head( +					local_sockaddr, +					mapped_local_addr); +		hlist_for_each_entry_safe(map_info, tmp_hlist_node, +					hash_bucket_head, hlist_node) { + +			if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr, +						mapped_local_addr)) { + +				hlist_del_init(&map_info->hlist_node); +				kfree(map_info); +				ret = 0; +				break; +			} +		} +	} +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +	return ret; +} +EXPORT_SYMBOL(iwpm_remove_mapinfo); + +static void free_hash_bucket(void) +{ +	struct hlist_node *tmp_hlist_node; +	struct iwpm_mapping_info *map_info; +	unsigned long flags; +	int i; + +	/* remove all the mapinfo data from the list */ +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { +		hlist_for_each_entry_safe(map_info, tmp_hlist_node, +			&iwpm_hash_bucket[i], hlist_node) { + +				hlist_del_init(&map_info->hlist_node); +				kfree(map_info); +			} +	} +	/* free the hash list */ +	kfree(iwpm_hash_bucket); +	iwpm_hash_bucket = NULL; +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +} + +struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, +					u8 nl_client, gfp_t gfp) +{ +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	unsigned long flags; + +	nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp); +	if (!nlmsg_request) { +		pr_err("%s Unable to allocate a nlmsg_request\n", __func__); +		return NULL; +	} +	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); +	list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list); +	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); + +	kref_init(&nlmsg_request->kref); +	kref_get(&nlmsg_request->kref); +	nlmsg_request->nlmsg_seq = nlmsg_seq; +	nlmsg_request->nl_client = nl_client; +	nlmsg_request->request_done = 0; +	nlmsg_request->err_code = 0; +	return nlmsg_request; +} + +void iwpm_free_nlmsg_request(struct kref *kref) +{ +	struct iwpm_nlmsg_request *nlmsg_request; +	unsigned long flags; + +	nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref); + +	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); +	list_del_init(&nlmsg_request->inprocess_list); +	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); + +	if (!nlmsg_request->request_done) +		pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n", +			__func__, nlmsg_request->nlmsg_seq); +	kfree(nlmsg_request); +} + +struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq) +{ +	struct iwpm_nlmsg_request *nlmsg_request; +	struct iwpm_nlmsg_request *found_request = NULL; +	unsigned long flags; + +	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); +	list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list, +			    inprocess_list) { +		if (nlmsg_request->nlmsg_seq == echo_seq) { +			found_request = nlmsg_request; +			kref_get(&nlmsg_request->kref); +			break; +		} +	} +	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); +	return found_request; +} + +int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request) +{ +	int ret; +	init_waitqueue_head(&nlmsg_request->waitq); + +	ret = wait_event_timeout(nlmsg_request->waitq, +			(nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT); +	if (!ret) { +		ret = -EINVAL; +		pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n", +			__func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq); +	} else { +		ret = nlmsg_request->err_code; +	} +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	return ret; +} + +int iwpm_get_nlmsg_seq(void) +{ +	return atomic_inc_return(&iwpm_admin.nlmsg_seq); +} + +int iwpm_valid_client(u8 nl_client) +{ +	if (nl_client >= RDMA_NL_NUM_CLIENTS) +		return 0; +	return iwpm_admin.client_list[nl_client]; +} + +void iwpm_set_valid(u8 nl_client, int valid) +{ +	if (nl_client >= RDMA_NL_NUM_CLIENTS) +		return; +	iwpm_admin.client_list[nl_client] = valid; +} + +/* valid client */ +int iwpm_registered_client(u8 nl_client) +{ +	return iwpm_admin.reg_list[nl_client]; +} + +/* valid client */ +void iwpm_set_registered(u8 nl_client, int reg) +{ +	iwpm_admin.reg_list[nl_client] = reg; +} + +int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, +				struct sockaddr_storage *b_sockaddr) +{ +	if (a_sockaddr->ss_family != b_sockaddr->ss_family) +		return 1; +	if (a_sockaddr->ss_family == AF_INET) { +		struct sockaddr_in *a4_sockaddr = +			(struct sockaddr_in *)a_sockaddr; +		struct sockaddr_in *b4_sockaddr = +			(struct sockaddr_in *)b_sockaddr; +		if (!memcmp(&a4_sockaddr->sin_addr, +			&b4_sockaddr->sin_addr, sizeof(struct in_addr)) +			&& a4_sockaddr->sin_port == b4_sockaddr->sin_port) +				return 0; + +	} else if (a_sockaddr->ss_family == AF_INET6) { +		struct sockaddr_in6 *a6_sockaddr = +			(struct sockaddr_in6 *)a_sockaddr; +		struct sockaddr_in6 *b6_sockaddr = +			(struct sockaddr_in6 *)b_sockaddr; +		if (!memcmp(&a6_sockaddr->sin6_addr, +			&b6_sockaddr->sin6_addr, sizeof(struct in6_addr)) +			&& a6_sockaddr->sin6_port == b6_sockaddr->sin6_port) +				return 0; + +	} else { +		pr_err("%s: Invalid sockaddr family\n", __func__); +	} +	return 1; +} + +struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, +						int nl_client) +{ +	struct sk_buff *skb = NULL; + +	skb = dev_alloc_skb(NLMSG_GOODSIZE); +	if (!skb) { +		pr_err("%s Unable to allocate skb\n", __func__); +		goto create_nlmsg_exit; +	} +	if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op, +			   NLM_F_REQUEST))) { +		pr_warn("%s: Unable to put the nlmsg header\n", __func__); +		dev_kfree_skb(skb); +		skb = NULL; +	} +create_nlmsg_exit: +	return skb; +} + +int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, +				   const struct nla_policy *nlmsg_policy, +				   struct nlattr *nltb[], const char *msg_type) +{ +	int nlh_len = 0; +	int ret; +	const char *err_str = ""; + +	ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy); +	if (ret) { +		err_str = "Invalid attribute"; +		goto parse_nlmsg_error; +	} +	ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy); +	if (ret) { +		err_str = "Unable to parse the nlmsg"; +		goto parse_nlmsg_error; +	} +	ret = iwpm_validate_nlmsg_attr(nltb, policy_max); +	if (ret) { +		err_str = "Invalid NULL attribute"; +		goto parse_nlmsg_error; +	} +	return 0; +parse_nlmsg_error: +	pr_warn("%s: %s (msg type %s ret = %d)\n", +			__func__, err_str, msg_type, ret); +	return ret; +} + +void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg) +{ +	struct sockaddr_in6 *sockaddr_v6; +	struct sockaddr_in *sockaddr_v4; + +	switch (sockaddr->ss_family) { +	case AF_INET: +		sockaddr_v4 = (struct sockaddr_in *)sockaddr; +		pr_debug("%s IPV4 %pI4: %u(0x%04X)\n", +			msg, &sockaddr_v4->sin_addr, +			ntohs(sockaddr_v4->sin_port), +			ntohs(sockaddr_v4->sin_port)); +		break; +	case AF_INET6: +		sockaddr_v6 = (struct sockaddr_in6 *)sockaddr; +		pr_debug("%s IPV6 %pI6: %u(0x%04X)\n", +			msg, &sockaddr_v6->sin6_addr, +			ntohs(sockaddr_v6->sin6_port), +			ntohs(sockaddr_v6->sin6_port)); +		break; +	default: +		break; +	} +} + +static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr) +{ +	u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0); +	u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0); +	return hash; +} + +static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr) +{ +	u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0); +	u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0); +	return hash; +} + +static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage +					       *local_sockaddr, +					       struct sockaddr_storage +					       *mapped_sockaddr) +{ +	u32 local_hash, mapped_hash, hash; + +	if (local_sockaddr->ss_family == AF_INET) { +		local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr); +		mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr); + +	} else if (local_sockaddr->ss_family == AF_INET6) { +		local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr); +		mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr); +	} else { +		pr_err("%s: Invalid sockaddr family\n", __func__); +		return NULL; +	} + +	if (local_hash == mapped_hash) /* if port mapper isn't available */ +		hash = local_hash; +	else +		hash = jhash_2words(local_hash, mapped_hash, 0); + +	return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK]; +} + +static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) +{ +	struct sk_buff *skb = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client); +	if (!skb) { +		err_str = "Unable to create a nlmsg"; +		goto mapinfo_num_error; +	} +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	msg_seq = 0; +	err_str = "Unable to put attribute of mapinfo number nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ); +	if (ret) +		goto mapinfo_num_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), +				&mapping_num, IWPM_NLA_MAPINFO_SEND_NUM); +	if (ret) +		goto mapinfo_num_error; +	ret = ibnl_unicast(skb, nlh, iwpm_pid); +	if (ret) { +		skb = NULL; +		err_str = "Unable to send a nlmsg"; +		goto mapinfo_num_error; +	} +	pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num); +	return 0; +mapinfo_num_error: +	pr_info("%s: %s\n", __func__, err_str); +	if (skb) +		dev_kfree_skb(skb); +	return ret; +} + +static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid) +{ +	struct nlmsghdr *nlh = NULL; +	int ret = 0; + +	if (!skb) +		return ret; +	if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, +			   RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { +		pr_warn("%s Unable to put NLMSG_DONE\n", __func__); +		return -ENOMEM; +	} +	nlh->nlmsg_type = NLMSG_DONE; +	ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid); +	if (ret) +		pr_warn("%s Unable to send a nlmsg\n", __func__); +	return ret; +} + +int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) +{ +	struct iwpm_mapping_info *map_info; +	struct sk_buff *skb = NULL; +	struct nlmsghdr *nlh; +	int skb_num = 0, mapping_num = 0; +	int i = 0, nlmsg_bytes = 0; +	unsigned long flags; +	const char *err_str = ""; +	int ret; + +	skb = dev_alloc_skb(NLMSG_GOODSIZE); +	if (!skb) { +		ret = -ENOMEM; +		err_str = "Unable to allocate skb"; +		goto send_mapping_info_exit; +	} +	skb_num++; +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { +		hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], +				     hlist_node) { +			if (map_info->nl_client != nl_client) +				continue; +			nlh = NULL; +			if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, +					RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { +				ret = -ENOMEM; +				err_str = "Unable to put the nlmsg header"; +				goto send_mapping_info_unlock; +			} +			err_str = "Unable to put attribute of the nlmsg"; +			ret = ibnl_put_attr(skb, nlh, +					sizeof(struct sockaddr_storage), +					&map_info->local_sockaddr, +					IWPM_NLA_MAPINFO_LOCAL_ADDR); +			if (ret) +				goto send_mapping_info_unlock; + +			ret = ibnl_put_attr(skb, nlh, +					sizeof(struct sockaddr_storage), +					&map_info->mapped_sockaddr, +					IWPM_NLA_MAPINFO_MAPPED_ADDR); +			if (ret) +				goto send_mapping_info_unlock; + +			iwpm_print_sockaddr(&map_info->local_sockaddr, +				"send_mapping_info: Local sockaddr:"); +			iwpm_print_sockaddr(&map_info->mapped_sockaddr, +				"send_mapping_info: Mapped local sockaddr:"); +			mapping_num++; +			nlmsg_bytes += nlh->nlmsg_len; + +			/* check if all mappings can fit in one skb */ +			if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) { +				/* and leave room for NLMSG_DONE */ +				nlmsg_bytes = 0; +				skb_num++; +				spin_unlock_irqrestore(&iwpm_mapinfo_lock, +						       flags); +				/* send the skb */ +				ret = send_nlmsg_done(skb, nl_client, iwpm_pid); +				skb = NULL; +				if (ret) { +					err_str = "Unable to send map info"; +					goto send_mapping_info_exit; +				} +				if (skb_num == IWPM_MAPINFO_SKB_COUNT) { +					ret = -ENOMEM; +					err_str = "Insufficient skbs for map info"; +					goto send_mapping_info_exit; +				} +				skb = dev_alloc_skb(NLMSG_GOODSIZE); +				if (!skb) { +					ret = -ENOMEM; +					err_str = "Unable to allocate skb"; +					goto send_mapping_info_exit; +				} +				spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +			} +		} +	} +send_mapping_info_unlock: +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +send_mapping_info_exit: +	if (ret) { +		pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret); +		if (skb) +			dev_kfree_skb(skb); +		return ret; +	} +	send_nlmsg_done(skb, nl_client, iwpm_pid); +	return send_mapinfo_num(mapping_num, nl_client, iwpm_pid); +} + +int iwpm_mapinfo_available(void) +{ +	unsigned long flags; +	int full_bucket = 0, i = 0; + +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	if (iwpm_hash_bucket) { +		for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { +			if (!hlist_empty(&iwpm_hash_bucket[i])) { +				full_bucket = 1; +				break; +			} +		} +	} +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +	return full_bucket; +} diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h new file mode 100644 index 00000000000..9777c869a14 --- /dev/null +++ b/drivers/infiniband/core/iwpm_util.h @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses.  You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + *     Redistribution and use in source and binary forms, with or + *     without modification, are permitted provided that the following + *     conditions are met: + * + *      - Redistributions of source code must retain the above + *	  copyright notice, this list of conditions and the following + *	  disclaimer. + * + *      - Redistributions in binary form must reproduce the above + *	  copyright notice, this list of conditions and the following + *	  disclaimer in the documentation and/or other materials + *	  provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _IWPM_UTIL_H +#define _IWPM_UTIL_H + +#include <linux/module.h> +#include <linux/io.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/delay.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <linux/jhash.h> +#include <linux/kref.h> +#include <net/netlink.h> +#include <linux/errno.h> +#include <rdma/iw_portmap.h> +#include <rdma/rdma_netlink.h> + + +#define IWPM_NL_RETRANS		3 +#define IWPM_NL_TIMEOUT		(10*HZ) +#define IWPM_MAPINFO_SKB_COUNT	20 + +#define IWPM_PID_UNDEFINED     -1 +#define IWPM_PID_UNAVAILABLE   -2 + +struct iwpm_nlmsg_request { +	struct list_head    inprocess_list; +	__u32               nlmsg_seq; +	void                *req_buffer; +	u8	            nl_client; +	u8                  request_done; +	u16                 err_code; +	wait_queue_head_t   waitq; +	struct kref         kref; +}; + +struct iwpm_mapping_info { +	struct hlist_node hlist_node; +	struct sockaddr_storage local_sockaddr; +	struct sockaddr_storage mapped_sockaddr; +	u8     nl_client; +}; + +struct iwpm_admin_data { +	atomic_t refcount; +	atomic_t nlmsg_seq; +	int      client_list[RDMA_NL_NUM_CLIENTS]; +	int      reg_list[RDMA_NL_NUM_CLIENTS]; +}; + +/** + * iwpm_get_nlmsg_request - Allocate and initialize netlink message request + * @nlmsg_seq: Sequence number of the netlink message + * @nl_client: The index of the netlink client + * @gfp: Indicates how the memory for the request should be allocated + * + * Returns the newly allocated netlink request object if successful, + * otherwise returns NULL + */ +struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, +						u8 nl_client, gfp_t gfp); + +/** + * iwpm_free_nlmsg_request - Deallocate netlink message request + * @kref: Holds reference of netlink message request + */ +void iwpm_free_nlmsg_request(struct kref *kref); + +/** + * iwpm_find_nlmsg_request - Find netlink message request in the request list + * @echo_seq: Sequence number of the netlink request to find + * + * Returns the found netlink message request, + * if not found, returns NULL + */ +struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq); + +/** + * iwpm_wait_complete_req - Block while servicing the netlink request + * @nlmsg_request: Netlink message request to service + * + * Wakes up, after the request is completed or expired + * Returns 0 if the request is complete without error + */ +int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); + +/** + * iwpm_get_nlmsg_seq - Get the sequence number for a netlink + *			message to send to the port mapper + * + * Returns the sequence number for the netlink message. + */ +int iwpm_get_nlmsg_seq(void); + +/** + * iwpm_valid_client - Check if the port mapper client is valid + * @nl_client: The index of the netlink client + * + * Valid clients need to call iwpm_init() before using + * the port mapper + */ +int iwpm_valid_client(u8 nl_client); + +/** + * iwpm_set_valid - Set the port mapper client to valid or not + * @nl_client: The index of the netlink client + * @valid: 1 if valid or 0 if invalid + */ +void iwpm_set_valid(u8 nl_client, int valid); + +/** + * iwpm_registered_client - Check if the port mapper client is registered + * @nl_client: The index of the netlink client + * + * Call iwpm_register_pid() to register a client + */ +int iwpm_registered_client(u8 nl_client); + +/** + * iwpm_set_registered - Set the port mapper client to registered or not + * @nl_client: The index of the netlink client + * @reg: 1 if registered or 0 if not + */ +void iwpm_set_registered(u8 nl_client, int reg); + +/** + * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of + *                     a client to the user space port mapper + * @nl_client: The index of the netlink client + * @iwpm_pid: The pid of the user space port mapper + * + * If successful, returns the number of sent mapping info records + */ +int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid); + +/** + * iwpm_mapinfo_available - Check if any mapping info records is available + *		            in the hash table + * + * Returns 1 if mapping information is available, otherwise returns 0 + */ +int iwpm_mapinfo_available(void); + +/** + * iwpm_compare_sockaddr - Compare two sockaddr storage structs + * + * Returns 0 if they are holding the same ip/tcp address info, + * otherwise returns 1 + */ +int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, +			struct sockaddr_storage *b_sockaddr); + +/** + * iwpm_validate_nlmsg_attr - Check for NULL netlink attributes + * @nltb: Holds address of each netlink message attributes + * @nla_count: Number of netlink message attributes + * + * Returns error if any of the nla_count attributes is NULL + */ +static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[], +					   int nla_count) +{ +	int i; +	for (i = 1; i < nla_count; i++) { +		if (!nltb[i]) +			return -EINVAL; +	} +	return 0; +} + +/** + * iwpm_create_nlmsg - Allocate skb and form a netlink message + * @nl_op: Netlink message opcode + * @nlh: Holds address of the netlink message header in skb + * @nl_client: The index of the netlink client + * + * Returns the newly allcated skb, or NULL if the tailroom of the skb + * is insufficient to store the message header and payload + */ +struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, +					int nl_client); + +/** + * iwpm_parse_nlmsg - Validate and parse the received netlink message + * @cb: Netlink callback structure + * @policy_max: Maximum attribute type to be expected + * @nlmsg_policy: Validation policy + * @nltb: Array to store policy_max parsed elements + * @msg_type: Type of netlink message + * + * Returns 0 on success or a negative error code + */ +int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, +				const struct nla_policy *nlmsg_policy, +				struct nlattr *nltb[], const char *msg_type); + +/** + * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port + * @sockaddr: Socket address to print + * @msg: Message to print + */ +void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); +#endif diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 822cfdcd9f7..ab31f136d04 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -35,6 +35,7 @@   */  #include <linux/dma-mapping.h>  #include <linux/slab.h> +#include <linux/module.h>  #include <rdma/ib_cache.h>  #include "mad_priv.h" @@ -276,6 +277,13 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,  		goto error1;  	} +	/* Verify the QP requested is supported.  For example, Ethernet devices +	 * will not have QP0 */ +	if (!port_priv->qp_info[qpn].qp) { +		ret = ERR_PTR(-EPROTONOSUPPORT); +		goto error1; +	} +  	/* Allocate structures */  	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);  	if (!mad_agent_priv) { @@ -1014,12 +1022,21 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)  					mad_send_wr->send_buf.mad,  					sge[0].length,  					DMA_TO_DEVICE); +	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) +		return -ENOMEM; +  	mad_send_wr->header_mapping = sge[0].addr;  	sge[1].addr = ib_dma_map_single(mad_agent->device,  					ib_get_payload(mad_send_wr),  					sge[1].length,  					DMA_TO_DEVICE); +	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { +		ib_dma_unmap_single(mad_agent->device, +				    mad_send_wr->header_mapping, +				    sge[0].length, DMA_TO_DEVICE); +		return -ENOMEM; +	}  	mad_send_wr->payload_mapping = sge[1].addr;  	spin_lock_irqsave(&qp_info->send_queue.lock, flags); @@ -1589,6 +1606,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,  					mad->mad_hdr.class_version].class;  			if (!class)  				goto out; +			if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >= +			    IB_MGMT_MAX_METHODS) +				goto out;  			method = class->method_table[convert_mgmt_class(  							mad->mad_hdr.mgmt_class)];  			if (method) @@ -1831,6 +1851,26 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,  	}  } +static bool generate_unmatched_resp(struct ib_mad_private *recv, +				    struct ib_mad_private *response) +{ +	if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || +	    recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { +		memcpy(response, recv, sizeof *response); +		response->header.recv_wc.wc = &response->header.wc; +		response->header.recv_wc.recv_buf.mad = &response->mad.mad; +		response->header.recv_wc.recv_buf.grh = &response->grh; +		response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; +		response->mad.mad.mad_hdr.status = +			cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); +		if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) +			response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + +		return true; +	} else { +		return false; +	} +}  static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,  				     struct ib_wc *wc)  { @@ -1840,6 +1880,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,  	struct ib_mad_list_head *mad_list;  	struct ib_mad_agent_private *mad_agent;  	int port_num; +	int ret = IB_MAD_RESULT_SUCCESS;  	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;  	qp_info = mad_list->mad_queue->qp_info; @@ -1923,8 +1964,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,  local:  	/* Give driver "right of first refusal" on incoming MAD */  	if (port_priv->device->process_mad) { -		int ret; -  		ret = port_priv->device->process_mad(port_priv->device, 0,  						     port_priv->port_num,  						     wc, &recv->grh, @@ -1952,6 +1991,10 @@ local:  		 * or via recv_handler in ib_mad_complete_recv()  		 */  		recv = NULL; +	} else if ((ret & IB_MAD_RESULT_SUCCESS) && +		   generate_unmatched_resp(recv, response)) { +		agent_send_response(&response->mad.mad, &recv->grh, wc, +				    port_priv->device, port_num, qp_info->qp->qp_num);  	}  out: @@ -1970,7 +2013,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)  	unsigned long delay;  	if (list_empty(&mad_agent_priv->wait_list)) { -		__cancel_delayed_work(&mad_agent_priv->timed_work); +		cancel_delayed_work(&mad_agent_priv->timed_work);  	} else {  		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,  					 struct ib_mad_send_wr_private, @@ -1979,13 +2022,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)  		if (time_after(mad_agent_priv->timeout,  			       mad_send_wr->timeout)) {  			mad_agent_priv->timeout = mad_send_wr->timeout; -			__cancel_delayed_work(&mad_agent_priv->timed_work);  			delay = mad_send_wr->timeout - jiffies;  			if ((long)delay <= 0)  				delay = 1; -			queue_delayed_work(mad_agent_priv->qp_info-> -					   port_priv->wq, -					   &mad_agent_priv->timed_work, delay); +			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, +					 &mad_agent_priv->timed_work, delay);  		}  	}  } @@ -2018,11 +2059,9 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)  	list_add(&mad_send_wr->agent_list, list_item);  	/* Reschedule a work item if we have a shorter timeout */ -	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) { -		__cancel_delayed_work(&mad_agent_priv->timed_work); -		queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq, -				   &mad_agent_priv->timed_work, delay); -	} +	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) +		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, +				 &mad_agent_priv->timed_work, delay);  }  void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, @@ -2560,6 +2599,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,  						 sizeof *mad_priv -  						   sizeof mad_priv->header,  						 DMA_FROM_DEVICE); +		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, +						  sg_list.addr))) { +			ret = -ENOMEM; +			break; +		}  		mad_priv->header.mapping = sg_list.addr;  		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;  		mad_priv->header.mad_list.mad_queue = recv_queue; @@ -2633,6 +2677,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)  	int ret, i;  	struct ib_qp_attr *attr;  	struct ib_qp *qp; +	u16 pkey_index;  	attr = kmalloc(sizeof *attr, GFP_KERNEL);  	if (!attr) { @@ -2640,6 +2685,11 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)  		return -ENOMEM;  	} +	ret = ib_find_pkey(port_priv->device, port_priv->port_num, +			   IB_DEFAULT_PKEY_FULL, &pkey_index); +	if (ret) +		pkey_index = 0; +  	for (i = 0; i < IB_MAD_QPS_CORE; i++) {  		qp = port_priv->qp_info[i].qp;  		if (!qp) @@ -2650,7 +2700,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)  		 * one is needed for the Reset to Init transition  		 */  		attr->qp_state = IB_QPS_INIT; -		attr->pkey_index = 0; +		attr->pkey_index = pkey_index;  		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;  		ret = ib_modify_qp(qp, attr, IB_QP_STATE |  					     IB_QP_PKEY_INDEX | IB_QP_QKEY); diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 68b4162fd9d..d2360a8ef0b 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -34,6 +34,7 @@  #include <linux/dma-mapping.h>  #include <linux/err.h>  #include <linux/interrupt.h> +#include <linux/export.h>  #include <linux/slab.h>  #include <linux/bitops.h>  #include <linux/random.h> diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c new file mode 100644 index 00000000000..23dd5a5c759 --- /dev/null +++ b/drivers/infiniband/core/netlink.c @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2010 Voltaire Inc.  All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses.  You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + *     Redistribution and use in source and binary forms, with or + *     without modification, are permitted provided that the following + *     conditions are met: + * + *      - Redistributions of source code must retain the above + *        copyright notice, this list of conditions and the following + *        disclaimer. + * + *      - Redistributions in binary form must reproduce the above + *        copyright notice, this list of conditions and the following + *        disclaimer in the documentation and/or other materials + *        provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ + +#include <linux/export.h> +#include <net/netlink.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <rdma/rdma_netlink.h> + +struct ibnl_client { +	struct list_head		list; +	int				index; +	int				nops; +	const struct ibnl_client_cbs   *cb_table; +}; + +static DEFINE_MUTEX(ibnl_mutex); +static struct sock *nls; +static LIST_HEAD(client_list); + +int ibnl_add_client(int index, int nops, +		    const struct ibnl_client_cbs cb_table[]) +{ +	struct ibnl_client *cur; +	struct ibnl_client *nl_client; + +	nl_client = kmalloc(sizeof *nl_client, GFP_KERNEL); +	if (!nl_client) +		return -ENOMEM; + +	nl_client->index	= index; +	nl_client->nops		= nops; +	nl_client->cb_table	= cb_table; + +	mutex_lock(&ibnl_mutex); + +	list_for_each_entry(cur, &client_list, list) { +		if (cur->index == index) { +			pr_warn("Client for %d already exists\n", index); +			mutex_unlock(&ibnl_mutex); +			kfree(nl_client); +			return -EINVAL; +		} +	} + +	list_add_tail(&nl_client->list, &client_list); + +	mutex_unlock(&ibnl_mutex); + +	return 0; +} +EXPORT_SYMBOL(ibnl_add_client); + +int ibnl_remove_client(int index) +{ +	struct ibnl_client *cur, *next; + +	mutex_lock(&ibnl_mutex); +	list_for_each_entry_safe(cur, next, &client_list, list) { +		if (cur->index == index) { +			list_del(&(cur->list)); +			mutex_unlock(&ibnl_mutex); +			kfree(cur); +			return 0; +		} +	} +	pr_warn("Can't remove callback for client idx %d. Not found\n", index); +	mutex_unlock(&ibnl_mutex); + +	return -EINVAL; +} +EXPORT_SYMBOL(ibnl_remove_client); + +void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, +		   int len, int client, int op, int flags) +{ +	unsigned char *prev_tail; + +	prev_tail = skb_tail_pointer(skb); +	*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), +			 len, flags); +	if (!*nlh) +		goto out_nlmsg_trim; +	(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail; +	return nlmsg_data(*nlh); + +out_nlmsg_trim: +	nlmsg_trim(skb, prev_tail); +	return NULL; +} +EXPORT_SYMBOL(ibnl_put_msg); + +int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, +		  int len, void *data, int type) +{ +	unsigned char *prev_tail; + +	prev_tail = skb_tail_pointer(skb); +	if (nla_put(skb, type, len, data)) +		goto nla_put_failure; +	nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail; +	return 0; + +nla_put_failure: +	nlmsg_trim(skb, prev_tail - nlh->nlmsg_len); +	return -EMSGSIZE; +} +EXPORT_SYMBOL(ibnl_put_attr); + +static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ +	struct ibnl_client *client; +	int type = nlh->nlmsg_type; +	int index = RDMA_NL_GET_CLIENT(type); +	int op = RDMA_NL_GET_OP(type); + +	list_for_each_entry(client, &client_list, list) { +		if (client->index == index) { +			if (op < 0 || op >= client->nops || +			    !client->cb_table[op].dump) +				return -EINVAL; + +			{ +				struct netlink_dump_control c = { +					.dump = client->cb_table[op].dump, +					.module = client->cb_table[op].module, +				}; +				return netlink_dump_start(nls, skb, nlh, &c); +			} +		} +	} + +	pr_info("Index %d wasn't found in client list\n", index); +	return -EINVAL; +} + +static void ibnl_rcv(struct sk_buff *skb) +{ +	mutex_lock(&ibnl_mutex); +	netlink_rcv_skb(skb, &ibnl_rcv_msg); +	mutex_unlock(&ibnl_mutex); +} + +int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, +			__u32 pid) +{ +	return nlmsg_unicast(nls, skb, pid); +} +EXPORT_SYMBOL(ibnl_unicast); + +int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, +			unsigned int group, gfp_t flags) +{ +	return nlmsg_multicast(nls, skb, 0, group, flags); +} +EXPORT_SYMBOL(ibnl_multicast); + +int __init ibnl_init(void) +{ +	struct netlink_kernel_cfg cfg = { +		.input	= ibnl_rcv, +	}; + +	nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg); +	if (!nls) { +		pr_warn("Failed to create netlink socket\n"); +		return -ENOMEM; +	} + +	return 0; +} + +void ibnl_cleanup(void) +{ +	struct ibnl_client *cur, *next; + +	mutex_lock(&ibnl_mutex); +	list_for_each_entry_safe(cur, next, &client_list, list) { +		list_del(&(cur->list)); +		kfree(cur); +	} +	mutex_unlock(&ibnl_mutex); + +	netlink_kernel_release(nls); +} diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index 019bd4b0863..1b65986c0be 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -31,6 +31,7 @@   * SOFTWARE.   */ +#include <linux/export.h>  #include <linux/string.h>  #include <rdma/ib_pack.h> diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 91a660310b7..233eaf541f5 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -42,7 +42,7 @@  #include <linux/kref.h>  #include <linux/idr.h>  #include <linux/workqueue.h> - +#include <uapi/linux/if_ether.h>  #include <rdma/ib_pack.h>  #include <rdma/ib_cache.h>  #include "sa.h" @@ -94,6 +94,12 @@ struct ib_sa_path_query {  	struct ib_sa_query sa_query;  }; +struct ib_sa_guidinfo_query { +	void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); +	void *context; +	struct ib_sa_query sa_query; +}; +  struct ib_sa_mcmember_query {  	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);  	void *context; @@ -347,6 +353,34 @@ static const struct ib_field service_rec_table[] = {  	  .size_bits    = 2*64 },  }; +#define GUIDINFO_REC_FIELD(field) \ +	.struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),	\ +	.struct_size_bytes   = sizeof((struct ib_sa_guidinfo_rec *) 0)->field,	\ +	.field_name          = "sa_guidinfo_rec:" #field + +static const struct ib_field guidinfo_rec_table[] = { +	{ GUIDINFO_REC_FIELD(lid), +	  .offset_words = 0, +	  .offset_bits  = 0, +	  .size_bits    = 16 }, +	{ GUIDINFO_REC_FIELD(block_num), +	  .offset_words = 0, +	  .offset_bits  = 16, +	  .size_bits    = 8 }, +	{ GUIDINFO_REC_FIELD(res1), +	  .offset_words = 0, +	  .offset_bits  = 24, +	  .size_bits    = 8 }, +	{ GUIDINFO_REC_FIELD(res2), +	  .offset_words = 1, +	  .offset_bits  = 0, +	  .size_bits    = 32 }, +	{ GUIDINFO_REC_FIELD(guid_info_list), +	  .offset_words = 2, +	  .offset_bits  = 0, +	  .size_bits    = 512 }, +}; +  static void free_sm_ah(struct kref *kref)  {  	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -425,7 +459,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event  		port->sm_ah = NULL;  		spin_unlock_irqrestore(&port->ah_lock, flags); -		schedule_work(&sa_dev->port[event->element.port_num - +		queue_work(ib_wq, &sa_dev->port[event->element.port_num -  					    sa_dev->start_port].update_task);  	}  } @@ -522,6 +556,13 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,  		ah_attr->grh.hop_limit     = rec->hop_limit;  		ah_attr->grh.traffic_class = rec->traffic_class;  	} +	if (force_grh) { +		memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); +		ah_attr->vlan_id = rec->vlan_id; +	} else { +		ah_attr->vlan_id = 0xffff; +	} +  	return 0;  }  EXPORT_SYMBOL(ib_init_ah_from_path); @@ -577,19 +618,21 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)  static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)  { +	bool preload = !!(gfp_mask & __GFP_WAIT);  	unsigned long flags;  	int ret, id; -retry: -	if (!idr_pre_get(&query_idr, gfp_mask)) -		return -ENOMEM; +	if (preload) +		idr_preload(gfp_mask);  	spin_lock_irqsave(&idr_lock, flags); -	ret = idr_get_new(&query_idr, query, &id); + +	id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); +  	spin_unlock_irqrestore(&idr_lock, flags); -	if (ret == -EAGAIN) -		goto retry; -	if (ret) -		return ret; +	if (preload) +		idr_preload_end(); +	if (id < 0) +		return id;  	query->mad_buf->timeout_ms  = timeout_ms;  	query->mad_buf->context[0] = query; @@ -616,6 +659,12 @@ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)  }  EXPORT_SYMBOL(ib_sa_unpack_path); +void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute) +{ +	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); +} +EXPORT_SYMBOL(ib_sa_pack_path); +  static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,  				    int status,  				    struct ib_sa_mad *mad) @@ -628,6 +677,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,  		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),  			  mad->data, &rec); +		rec.vlan_id = 0xffff; +		memset(rec.dmac, 0, ETH_ALEN); +		memset(rec.smac, 0, ETH_ALEN);  		query->callback(status, &rec, query->context);  	} else  		query->callback(status, NULL, query->context); @@ -945,6 +997,105 @@ err1:  	return ret;  } +/* Support GuidInfoRecord */ +static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, +					int status, +					struct ib_sa_mad *mad) +{ +	struct ib_sa_guidinfo_query *query = +		container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); + +	if (mad) { +		struct ib_sa_guidinfo_rec rec; + +		ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), +			  mad->data, &rec); +		query->callback(status, &rec, query->context); +	} else +		query->callback(status, NULL, query->context); +} + +static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) +{ +	kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); +} + +int ib_sa_guid_info_rec_query(struct ib_sa_client *client, +			      struct ib_device *device, u8 port_num, +			      struct ib_sa_guidinfo_rec *rec, +			      ib_sa_comp_mask comp_mask, u8 method, +			      int timeout_ms, gfp_t gfp_mask, +			      void (*callback)(int status, +					       struct ib_sa_guidinfo_rec *resp, +					       void *context), +			      void *context, +			      struct ib_sa_query **sa_query) +{ +	struct ib_sa_guidinfo_query *query; +	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); +	struct ib_sa_port *port; +	struct ib_mad_agent *agent; +	struct ib_sa_mad *mad; +	int ret; + +	if (!sa_dev) +		return -ENODEV; + +	if (method != IB_MGMT_METHOD_GET && +	    method != IB_MGMT_METHOD_SET && +	    method != IB_SA_METHOD_DELETE) { +		return -EINVAL; +	} + +	port  = &sa_dev->port[port_num - sa_dev->start_port]; +	agent = port->agent; + +	query = kmalloc(sizeof *query, gfp_mask); +	if (!query) +		return -ENOMEM; + +	query->sa_query.port = port; +	ret = alloc_mad(&query->sa_query, gfp_mask); +	if (ret) +		goto err1; + +	ib_sa_client_get(client); +	query->sa_query.client = client; +	query->callback        = callback; +	query->context         = context; + +	mad = query->sa_query.mad_buf->mad; +	init_mad(mad, agent); + +	query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; +	query->sa_query.release  = ib_sa_guidinfo_rec_release; + +	mad->mad_hdr.method	 = method; +	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); +	mad->sa_hdr.comp_mask	 = comp_mask; + +	ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, +		mad->data); + +	*sa_query = &query->sa_query; + +	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); +	if (ret < 0) +		goto err2; + +	return ret; + +err2: +	*sa_query = NULL; +	ib_sa_client_put(query->sa_query.client); +	free_mad(&query->sa_query); + +err1: +	kfree(query); +	return ret; +} +EXPORT_SYMBOL(ib_sa_guid_info_rec_query); +  static void send_handler(struct ib_mad_agent *agent,  			 struct ib_mad_send_wc *mad_send_wc)  { @@ -1079,7 +1230,7 @@ static void ib_sa_remove_one(struct ib_device *device)  	ib_unregister_event_handler(&sa_dev->event_handler); -	flush_scheduled_work(); +	flush_workqueue(ib_wq);  	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {  		if (rdma_port_get_link_layer(device, i + 1) == IB_LINK_LAYER_INFINIBAND) { diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 9ab5df72df7..cbd0383f622 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -35,6 +35,7 @@  #include "core_priv.h"  #include <linux/slab.h> +#include <linux/stat.h>  #include <linux/string.h>  #include <rdma/ib_mad.h> @@ -178,7 +179,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,  {  	struct ib_port_attr attr;  	char *speed = ""; -	int rate; +	int rate;		/* in deci-Gb/sec */  	ssize_t ret;  	ret = ib_query_port(p->ibdev, p->port_num, &attr); @@ -186,11 +187,33 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,  		return ret;  	switch (attr.active_speed) { -	case 2: speed = " DDR"; break; -	case 4: speed = " QDR"; break; +	case IB_SPEED_DDR: +		speed = " DDR"; +		rate = 50; +		break; +	case IB_SPEED_QDR: +		speed = " QDR"; +		rate = 100; +		break; +	case IB_SPEED_FDR10: +		speed = " FDR10"; +		rate = 100; +		break; +	case IB_SPEED_FDR: +		speed = " FDR"; +		rate = 140; +		break; +	case IB_SPEED_EDR: +		speed = " EDR"; +		rate = 250; +		break; +	case IB_SPEED_SDR: +	default:		/* default to SDR for invalid rates */ +		rate = 25; +		break;  	} -	rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed; +	rate *= ib_width_enum_to_int(attr.active_width);  	if (rate < 0)  		return -EINVAL; @@ -406,15 +429,19 @@ static void ib_port_release(struct kobject *kobj)  	struct attribute *a;  	int i; -	for (i = 0; (a = p->gid_group.attrs[i]); ++i) -		kfree(a); +	if (p->gid_group.attrs) { +		for (i = 0; (a = p->gid_group.attrs[i]); ++i) +			kfree(a); -	kfree(p->gid_group.attrs); +		kfree(p->gid_group.attrs); +	} -	for (i = 0; (a = p->pkey_group.attrs[i]); ++i) -		kfree(a); +	if (p->pkey_group.attrs) { +		for (i = 0; (a = p->pkey_group.attrs[i]); ++i) +			kfree(a); -	kfree(p->pkey_group.attrs); +		kfree(p->pkey_group.attrs); +	}  	kfree(p);  } @@ -511,10 +538,12 @@ static int add_port(struct ib_device *device, int port_num,  	p->port_num   = port_num;  	ret = kobject_init_and_add(&p->kobj, &port_type, -				   kobject_get(device->ports_parent), +				   device->ports_parent,  				   "%d", port_num); -	if (ret) -		goto err_put; +	if (ret) { +		kfree(p); +		return ret; +	}  	ret = sysfs_create_group(&p->kobj, &pma_group);  	if (ret) @@ -522,8 +551,10 @@ static int add_port(struct ib_device *device, int port_num,  	p->gid_group.name  = "gids";  	p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); -	if (!p->gid_group.attrs) +	if (!p->gid_group.attrs) { +		ret = -ENOMEM;  		goto err_remove_pma; +	}  	ret = sysfs_create_group(&p->kobj, &p->gid_group);  	if (ret) @@ -532,8 +563,10 @@ static int add_port(struct ib_device *device, int port_num,  	p->pkey_group.name  = "pkeys";  	p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,  						attr.pkey_tbl_len); -	if (!p->pkey_group.attrs) +	if (!p->pkey_group.attrs) { +		ret = -ENOMEM;  		goto err_remove_gid; +	}  	ret = sysfs_create_group(&p->kobj, &p->pkey_group);  	if (ret) @@ -558,6 +591,7 @@ err_free_pkey:  		kfree(p->pkey_group.attrs[i]);  	kfree(p->pkey_group.attrs); +	p->pkey_group.attrs = NULL;  err_remove_gid:  	sysfs_remove_group(&p->kobj, &p->gid_group); @@ -567,13 +601,13 @@ err_free_gid:  		kfree(p->gid_group.attrs[i]);  	kfree(p->gid_group.attrs); +	p->gid_group.attrs = NULL;  err_remove_pma:  	sysfs_remove_group(&p->kobj, &pma_group);  err_put: -	kobject_put(device->ports_parent); -	kfree(p); +	kobject_put(&p->kobj);  	return ret;  } @@ -585,6 +619,8 @@ static ssize_t show_node_type(struct device *device,  	switch (dev->node_type) {  	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);  	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type); +	case RDMA_NODE_USNIC:	  return sprintf(buf, "%d: usNIC\n", dev->node_type); +	case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);  	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);  	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);  	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type); @@ -780,6 +816,22 @@ static struct attribute_group iw_stats_group = {  	.attrs	= iw_proto_stats_attrs,  }; +static void free_port_list_attributes(struct ib_device *device) +{ +	struct kobject *p, *t; + +	list_for_each_entry_safe(p, t, &device->port_list, entry) { +		struct ib_port *port = container_of(p, struct ib_port, kobj); +		list_del(&p->entry); +		sysfs_remove_group(p, &pma_group); +		sysfs_remove_group(p, &port->pkey_group); +		sysfs_remove_group(p, &port->gid_group); +		kobject_put(p); +	} + +	kobject_put(device->ports_parent); +} +  int ib_device_register_sysfs(struct ib_device *device,  			     int (*port_callback)(struct ib_device *,  						  u8, struct kobject *)) @@ -790,7 +842,7 @@ int ib_device_register_sysfs(struct ib_device *device,  	class_dev->class      = &ib_class;  	class_dev->parent     = device->dma_device; -	dev_set_name(class_dev, device->name); +	dev_set_name(class_dev, "%s", device->name);  	dev_set_drvdata(class_dev, device);  	INIT_LIST_HEAD(&device->port_list); @@ -806,7 +858,7 @@ int ib_device_register_sysfs(struct ib_device *device,  	}  	device->ports_parent = kobject_create_and_add("ports", -					kobject_get(&class_dev->kobj)); +						      &class_dev->kobj);  	if (!device->ports_parent) {  		ret = -ENOMEM;  		goto err_put; @@ -833,21 +885,7 @@ int ib_device_register_sysfs(struct ib_device *device,  	return 0;  err_put: -	{ -		struct kobject *p, *t; -		struct ib_port *port; - -		list_for_each_entry_safe(p, t, &device->port_list, entry) { -			list_del(&p->entry); -			port = container_of(p, struct ib_port, kobj); -			sysfs_remove_group(p, &pma_group); -			sysfs_remove_group(p, &port->pkey_group); -			sysfs_remove_group(p, &port->gid_group); -			kobject_put(p); -		} -	} - -	kobject_put(&class_dev->kobj); +	free_port_list_attributes(device);  err_unregister:  	device_unregister(class_dev); @@ -858,22 +896,18 @@ err:  void ib_device_unregister_sysfs(struct ib_device *device)  { -	struct kobject *p, *t; -	struct ib_port *port; -  	/* Hold kobject until ib_dealloc_device() */ -	kobject_get(&device->dev.kobj); +	struct kobject *kobj_dev = kobject_get(&device->dev.kobj); +	int i; -	list_for_each_entry_safe(p, t, &device->port_list, entry) { -		list_del(&p->entry); -		port = container_of(p, struct ib_port, kobj); -		sysfs_remove_group(p, &pma_group); -		sysfs_remove_group(p, &port->pkey_group); -		sysfs_remove_group(p, &port->gid_group); -		kobject_put(p); -	} +	if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) +		sysfs_remove_group(kobj_dev, &iw_stats_group); + +	free_port_list_attributes(device); + +	for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) +		device_remove_file(&device->dev, ib_class_attributes[i]); -	kobject_put(device->ports_parent);  	device_unregister(&device->dev);  } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 08f948df8fa..f2f63933e8a 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -106,9 +106,6 @@ enum {  	IB_UCM_MAX_DEVICES = 32  }; -/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ -extern struct class cm_class; -  #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)  static void ib_ucm_add_one(struct ib_device *device); @@ -179,7 +176,6 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx)  static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)  {  	struct ib_ucm_context *ctx; -	int result;  	ctx = kzalloc(sizeof *ctx, GFP_KERNEL);  	if (!ctx) @@ -190,17 +186,10 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)  	ctx->file = file;  	INIT_LIST_HEAD(&ctx->events); -	do { -		result = idr_pre_get(&ctx_id_table, GFP_KERNEL); -		if (!result) -			goto error; - -		mutex_lock(&ctx_id_mutex); -		result = idr_get_new(&ctx_id_table, ctx, &ctx->id); -		mutex_unlock(&ctx_id_mutex); -	} while (result == -EAGAIN); - -	if (result) +	mutex_lock(&ctx_id_mutex); +	ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); +	mutex_unlock(&ctx_id_mutex); +	if (ctx->id < 0)  		goto error;  	list_add_tail(&ctx->file_list, &file->ctxs); @@ -400,7 +389,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,  	struct ib_ucm_event_get cmd;  	struct ib_ucm_event *uevent;  	int result = 0; -	DEFINE_WAIT(wait);  	if (out_len < sizeof(struct ib_ucm_event_resp))  		return -ENOSPC; @@ -1122,7 +1110,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,  	if (copy_from_user(&hdr, buf, sizeof(hdr)))  		return -EFAULT; -	if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) +	if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))  		return -EINVAL;  	if (hdr.in + sizeof(hdr) > len) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index ca12acf3837..56a4b7ca7ee 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -41,11 +41,14 @@  #include <linux/miscdevice.h>  #include <linux/slab.h>  #include <linux/sysctl.h> +#include <linux/module.h>  #include <rdma/rdma_user_cm.h>  #include <rdma/ib_marshall.h>  #include <rdma/rdma_cm.h>  #include <rdma/rdma_cm_ib.h> +#include <rdma/ib_addr.h> +#include <rdma/ib.h>  MODULE_AUTHOR("Sean Hefty");  MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); @@ -54,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");  static unsigned int max_backlog = 1024;  static struct ctl_table_header *ucma_ctl_table_hdr; -static ctl_table ucma_ctl_table[] = { +static struct ctl_table ucma_ctl_table[] = {  	{  		.procname	= "max_backlog",  		.data		= &max_backlog, @@ -65,12 +68,6 @@ static ctl_table ucma_ctl_table[] = {  	{ }  }; -static struct ctl_path ucma_ctl_path[] = { -	{ .procname = "net" }, -	{ .procname = "rdma_ucm" }, -	{ } -}; -  struct ucma_file {  	struct mutex		mut;  	struct file		*filp; @@ -150,7 +147,6 @@ static void ucma_put_ctx(struct ucma_context *ctx)  static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)  {  	struct ucma_context *ctx; -	int ret;  	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);  	if (!ctx) @@ -161,17 +157,10 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file)  	INIT_LIST_HEAD(&ctx->mc_list);  	ctx->file = file; -	do { -		ret = idr_pre_get(&ctx_idr, GFP_KERNEL); -		if (!ret) -			goto error; - -		mutex_lock(&mut); -		ret = idr_get_new(&ctx_idr, ctx, &ctx->id); -		mutex_unlock(&mut); -	} while (ret == -EAGAIN); - -	if (ret) +	mutex_lock(&mut); +	ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); +	mutex_unlock(&mut); +	if (ctx->id < 0)  		goto error;  	list_add_tail(&ctx->list, &file->ctx_list); @@ -185,23 +174,15 @@ error:  static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)  {  	struct ucma_multicast *mc; -	int ret;  	mc = kzalloc(sizeof(*mc), GFP_KERNEL);  	if (!mc)  		return NULL; -	do { -		ret = idr_pre_get(&multicast_idr, GFP_KERNEL); -		if (!ret) -			goto error; - -		mutex_lock(&mut); -		ret = idr_get_new(&multicast_idr, mc, &mc->id); -		mutex_unlock(&mut); -	} while (ret == -EAGAIN); - -	if (ret) +	mutex_lock(&mut); +	mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); +	mutex_unlock(&mut); +	if (mc->id < 0)  		goto error;  	mc->ctx = ctx; @@ -272,17 +253,17 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,  	if (!uevent)  		return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; +	mutex_lock(&ctx->file->mut);  	uevent->cm_id = cm_id;  	ucma_set_event_context(ctx, event, uevent);  	uevent->resp.event = event->event;  	uevent->resp.status = event->status; -	if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB) +	if (cm_id->qp_type == IB_QPT_UD)  		ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);  	else  		ucma_copy_conn_event(&uevent->resp.param.conn,  				     &event->param.conn); -	mutex_lock(&ctx->file->mut);  	if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {  		if (!ctx->backlog) {  			ret = -ENOMEM; @@ -290,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,  			goto out;  		}  		ctx->backlog--; -	} else if (!ctx->uid) { +	} else if (!ctx->uid || ctx->cm_id != cm_id) {  		/*  		 * We ignore events for new connections until userspace has set  		 * their context.  This can only happen if an error occurs on a @@ -315,7 +296,6 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,  	struct rdma_ucm_get_event cmd;  	struct ucma_event *uevent;  	int ret = 0; -	DEFINE_WAIT(wait);  	if (out_len < sizeof uevent->resp)  		return -ENOSPC; @@ -367,13 +347,31 @@ done:  	return ret;  } -static ssize_t ucma_create_id(struct ucma_file *file, -				const char __user *inbuf, -				int in_len, int out_len) +static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) +{ +	switch (cmd->ps) { +	case RDMA_PS_TCP: +		*qp_type = IB_QPT_RC; +		return 0; +	case RDMA_PS_UDP: +	case RDMA_PS_IPOIB: +		*qp_type = IB_QPT_UD; +		return 0; +	case RDMA_PS_IB: +		*qp_type = cmd->qp_type; +		return 0; +	default: +		return -EINVAL; +	} +} + +static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, +			      int in_len, int out_len)  {  	struct rdma_ucm_create_id cmd;  	struct rdma_ucm_create_id_resp resp;  	struct ucma_context *ctx; +	enum ib_qp_type qp_type;  	int ret;  	if (out_len < sizeof(resp)) @@ -382,6 +380,10 @@ static ssize_t ucma_create_id(struct ucma_file *file,  	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))  		return -EFAULT; +	ret = ucma_get_qp_type(&cmd, &qp_type); +	if (ret) +		return ret; +  	mutex_lock(&file->mut);  	ctx = ucma_alloc_ctx(file);  	mutex_unlock(&file->mut); @@ -389,7 +391,7 @@ static ssize_t ucma_create_id(struct ucma_file *file,  		return -ENOMEM;  	ctx->uid = cmd.uid; -	ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps); +	ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type);  	if (IS_ERR(ctx->cm_id)) {  		ret = PTR_ERR(ctx->cm_id);  		goto err1; @@ -426,24 +428,6 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)  	mutex_unlock(&mut);  } -static void ucma_cleanup_events(struct ucma_context *ctx) -{ -	struct ucma_event *uevent, *tmp; - -	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { -		if (uevent->ctx != ctx) -			continue; - -		list_del(&uevent->list); - -		/* clear incoming connections. */ -		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) -			rdma_destroy_id(uevent->cm_id); - -		kfree(uevent); -	} -} -  static void ucma_cleanup_mc_events(struct ucma_multicast *mc)  {  	struct ucma_event *uevent, *tmp; @@ -457,9 +441,16 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)  	}  } +/* + * We cannot hold file->mut when calling rdma_destroy_id() or we can + * deadlock.  We also acquire file->mut in ucma_event_handler(), and + * rdma_destroy_id() will wait until all callbacks have completed. + */  static int ucma_free_ctx(struct ucma_context *ctx)  {  	int events_reported; +	struct ucma_event *uevent, *tmp; +	LIST_HEAD(list);  	/* No new events will be generated after destroying the id. */  	rdma_destroy_id(ctx->cm_id); @@ -468,10 +459,20 @@ static int ucma_free_ctx(struct ucma_context *ctx)  	/* Cleanup events not yet reported to the user. */  	mutex_lock(&ctx->file->mut); -	ucma_cleanup_events(ctx); +	list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { +		if (uevent->ctx == ctx) +			list_move_tail(&uevent->list, &list); +	}  	list_del(&ctx->list);  	mutex_unlock(&ctx->file->mut); +	list_for_each_entry_safe(uevent, tmp, &list, list) { +		list_del(&uevent->list); +		if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) +			rdma_destroy_id(uevent->cm_id); +		kfree(uevent); +	} +  	events_reported = ctx->events_reported;  	kfree(ctx);  	return events_reported; @@ -511,10 +512,10 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,  	return ret;  } -static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, +static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf,  			      int in_len, int out_len)  { -	struct rdma_ucm_bind_addr cmd; +	struct rdma_ucm_bind_ip cmd;  	struct ucma_context *ctx;  	int ret; @@ -530,24 +531,75 @@ static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf,  	return ret;  } +static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, +			 int in_len, int out_len) +{ +	struct rdma_ucm_bind cmd; +	struct sockaddr *addr; +	struct ucma_context *ctx; +	int ret; + +	if (copy_from_user(&cmd, inbuf, sizeof(cmd))) +		return -EFAULT; + +	addr = (struct sockaddr *) &cmd.addr; +	if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr))) +		return -EINVAL; + +	ctx = ucma_get_ctx(file, cmd.id); +	if (IS_ERR(ctx)) +		return PTR_ERR(ctx); + +	ret = rdma_bind_addr(ctx->cm_id, addr); +	ucma_put_ctx(ctx); +	return ret; +} + +static ssize_t ucma_resolve_ip(struct ucma_file *file, +			       const char __user *inbuf, +			       int in_len, int out_len) +{ +	struct rdma_ucm_resolve_ip cmd; +	struct ucma_context *ctx; +	int ret; + +	if (copy_from_user(&cmd, inbuf, sizeof(cmd))) +		return -EFAULT; + +	ctx = ucma_get_ctx(file, cmd.id); +	if (IS_ERR(ctx)) +		return PTR_ERR(ctx); + +	ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, +				(struct sockaddr *) &cmd.dst_addr, +				cmd.timeout_ms); +	ucma_put_ctx(ctx); +	return ret; +} +  static ssize_t ucma_resolve_addr(struct ucma_file *file,  				 const char __user *inbuf,  				 int in_len, int out_len)  {  	struct rdma_ucm_resolve_addr cmd; +	struct sockaddr *src, *dst;  	struct ucma_context *ctx;  	int ret;  	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))  		return -EFAULT; +	src = (struct sockaddr *) &cmd.src_addr; +	dst = (struct sockaddr *) &cmd.dst_addr; +	if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) || +	    !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst))) +		return -EINVAL; +  	ctx = ucma_get_ctx(file, cmd.id);  	if (IS_ERR(ctx))  		return PTR_ERR(ctx); -	ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, -				(struct sockaddr *) &cmd.dst_addr, -				cmd.timeout_ms); +	ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms);  	ucma_put_ctx(ctx);  	return ret;  } @@ -603,24 +655,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,  static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,  				 struct rdma_route *route)  { -	struct rdma_dev_addr *dev_addr; -	struct net_device *dev; -	u16 vid = 0;  	resp->num_paths = route->num_paths;  	switch (route->num_paths) {  	case 0: -		dev_addr = &route->addr.dev_addr; -		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); -			if (dev) { -				vid = rdma_vlan_dev_vlan_id(dev); -				dev_put(dev); -			} - -		iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, -				    dev_addr->dst_dev_addr, vid); -		iboe_addr_get_sgid(dev_addr, -				   (union ib_gid *) &resp->ib_route[0].sgid); +		rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, +			    (union ib_gid *)&resp->ib_route[0].dgid); +		rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, +			    (union ib_gid *)&resp->ib_route[0].sgid);  		resp->ib_route[0].pkey = cpu_to_be16(0xffff);  		break;  	case 2: @@ -636,11 +678,21 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,  	}  } +static void ucma_copy_iw_route(struct rdma_ucm_query_route_resp *resp, +			       struct rdma_route *route) +{ +	struct rdma_dev_addr *dev_addr; + +	dev_addr = &route->addr.dev_addr; +	rdma_addr_get_dgid(dev_addr, (union ib_gid *) &resp->ib_route[0].dgid); +	rdma_addr_get_sgid(dev_addr, (union ib_gid *) &resp->ib_route[0].sgid); +} +  static ssize_t ucma_query_route(struct ucma_file *file,  				const char __user *inbuf,  				int in_len, int out_len)  { -	struct rdma_ucm_query_route cmd; +	struct rdma_ucm_query cmd;  	struct rdma_ucm_query_route_resp resp;  	struct ucma_context *ctx;  	struct sockaddr *addr; @@ -670,8 +722,10 @@ static ssize_t ucma_query_route(struct ucma_file *file,  	resp.node_guid = (__force __u64) ctx->cm_id->device->node_guid;  	resp.port_num = ctx->cm_id->port_num; -	if (rdma_node_get_transport(ctx->cm_id->device->node_type) == RDMA_TRANSPORT_IB) { -		switch (rdma_port_get_link_layer(ctx->cm_id->device, ctx->cm_id->port_num)) { +	switch (rdma_node_get_transport(ctx->cm_id->device->node_type)) { +	case RDMA_TRANSPORT_IB: +		switch (rdma_port_get_link_layer(ctx->cm_id->device, +			ctx->cm_id->port_num)) {  		case IB_LINK_LAYER_INFINIBAND:  			ucma_copy_ib_route(&resp, &ctx->cm_id->route);  			break; @@ -681,6 +735,12 @@ static ssize_t ucma_query_route(struct ucma_file *file,  		default:  			break;  		} +		break; +	case RDMA_TRANSPORT_IWARP: +		ucma_copy_iw_route(&resp, &ctx->cm_id->route); +		break; +	default: +		break;  	}  out: @@ -692,7 +752,162 @@ out:  	return ret;  } -static void ucma_copy_conn_param(struct rdma_conn_param *dst, +static void ucma_query_device_addr(struct rdma_cm_id *cm_id, +				   struct rdma_ucm_query_addr_resp *resp) +{ +	if (!cm_id->device) +		return; + +	resp->node_guid = (__force __u64) cm_id->device->node_guid; +	resp->port_num = cm_id->port_num; +	resp->pkey = (__force __u16) cpu_to_be16( +		     ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); +} + +static ssize_t ucma_query_addr(struct ucma_context *ctx, +			       void __user *response, int out_len) +{ +	struct rdma_ucm_query_addr_resp resp; +	struct sockaddr *addr; +	int ret = 0; + +	if (out_len < sizeof(resp)) +		return -ENOSPC; + +	memset(&resp, 0, sizeof resp); + +	addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; +	resp.src_size = rdma_addr_size(addr); +	memcpy(&resp.src_addr, addr, resp.src_size); + +	addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; +	resp.dst_size = rdma_addr_size(addr); +	memcpy(&resp.dst_addr, addr, resp.dst_size); + +	ucma_query_device_addr(ctx->cm_id, &resp); + +	if (copy_to_user(response, &resp, sizeof(resp))) +		ret = -EFAULT; + +	return ret; +} + +static ssize_t ucma_query_path(struct ucma_context *ctx, +			       void __user *response, int out_len) +{ +	struct rdma_ucm_query_path_resp *resp; +	int i, ret = 0; + +	if (out_len < sizeof(*resp)) +		return -ENOSPC; + +	resp = kzalloc(out_len, GFP_KERNEL); +	if (!resp) +		return -ENOMEM; + +	resp->num_paths = ctx->cm_id->route.num_paths; +	for (i = 0, out_len -= sizeof(*resp); +	     i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); +	     i++, out_len -= sizeof(struct ib_path_rec_data)) { + +		resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | +					   IB_PATH_BIDIRECTIONAL; +		ib_sa_pack_path(&ctx->cm_id->route.path_rec[i], +				&resp->path_data[i].path_rec); +	} + +	if (copy_to_user(response, resp, +			 sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) +		ret = -EFAULT; + +	kfree(resp); +	return ret; +} + +static ssize_t ucma_query_gid(struct ucma_context *ctx, +			      void __user *response, int out_len) +{ +	struct rdma_ucm_query_addr_resp resp; +	struct sockaddr_ib *addr; +	int ret = 0; + +	if (out_len < sizeof(resp)) +		return -ENOSPC; + +	memset(&resp, 0, sizeof resp); + +	ucma_query_device_addr(ctx->cm_id, &resp); + +	addr = (struct sockaddr_ib *) &resp.src_addr; +	resp.src_size = sizeof(*addr); +	if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { +		memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); +	} else { +		addr->sib_family = AF_IB; +		addr->sib_pkey = (__force __be16) resp.pkey; +		rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr, +				   (union ib_gid *) &addr->sib_addr); +		addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) +						    &ctx->cm_id->route.addr.src_addr); +	} + +	addr = (struct sockaddr_ib *) &resp.dst_addr; +	resp.dst_size = sizeof(*addr); +	if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { +		memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); +	} else { +		addr->sib_family = AF_IB; +		addr->sib_pkey = (__force __be16) resp.pkey; +		rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr, +				   (union ib_gid *) &addr->sib_addr); +		addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) +						    &ctx->cm_id->route.addr.dst_addr); +	} + +	if (copy_to_user(response, &resp, sizeof(resp))) +		ret = -EFAULT; + +	return ret; +} + +static ssize_t ucma_query(struct ucma_file *file, +			  const char __user *inbuf, +			  int in_len, int out_len) +{ +	struct rdma_ucm_query cmd; +	struct ucma_context *ctx; +	void __user *response; +	int ret; + +	if (copy_from_user(&cmd, inbuf, sizeof(cmd))) +		return -EFAULT; + +	response = (void __user *)(unsigned long) cmd.response; +	ctx = ucma_get_ctx(file, cmd.id); +	if (IS_ERR(ctx)) +		return PTR_ERR(ctx); + +	switch (cmd.option) { +	case RDMA_USER_CM_QUERY_ADDR: +		ret = ucma_query_addr(ctx, response, out_len); +		break; +	case RDMA_USER_CM_QUERY_PATH: +		ret = ucma_query_path(ctx, response, out_len); +		break; +	case RDMA_USER_CM_QUERY_GID: +		ret = ucma_query_gid(ctx, response, out_len); +		break; +	default: +		ret = -ENOSYS; +		break; +	} + +	ucma_put_ctx(ctx); +	return ret; +} + +static void ucma_copy_conn_param(struct rdma_cm_id *id, +				 struct rdma_conn_param *dst,  				 struct rdma_ucm_conn_param *src)  {  	dst->private_data = src->private_data; @@ -704,6 +919,7 @@ static void ucma_copy_conn_param(struct rdma_conn_param *dst,  	dst->rnr_retry_count = src->rnr_retry_count;  	dst->srq = src->srq;  	dst->qp_num = src->qp_num; +	dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0;  }  static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, @@ -724,7 +940,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf,  	if (IS_ERR(ctx))  		return PTR_ERR(ctx); -	ucma_copy_conn_param(&conn_param, &cmd.conn_param); +	ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);  	ret = rdma_connect(ctx->cm_id, &conn_param);  	ucma_put_ctx(ctx);  	return ret; @@ -767,9 +983,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,  		return PTR_ERR(ctx);  	if (cmd.conn_param.valid) { -		ctx->uid = cmd.uid; -		ucma_copy_conn_param(&conn_param, &cmd.conn_param); +		ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); +		mutex_lock(&file->mut);  		ret = rdma_accept(ctx->cm_id, &conn_param); +		if (!ret) +			ctx->uid = cmd.uid; +		mutex_unlock(&file->mut);  	} else  		ret = rdma_accept(ctx->cm_id, NULL); @@ -865,6 +1084,20 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,  		}  		rdma_set_service_type(ctx->cm_id, *((u8 *) optval));  		break; +	case RDMA_OPTION_ID_REUSEADDR: +		if (optlen != sizeof(int)) { +			ret = -EINVAL; +			break; +		} +		ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); +		break; +	case RDMA_OPTION_ID_AFONLY: +		if (optlen != sizeof(int)) { +			ret = -EINVAL; +			break; +		} +		ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); +		break;  	default:  		ret = -ENOSYS;  	} @@ -951,23 +1184,18 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,  	if (IS_ERR(ctx))  		return PTR_ERR(ctx); -	optval = kmalloc(cmd.optlen, GFP_KERNEL); -	if (!optval) { -		ret = -ENOMEM; -		goto out1; -	} - -	if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval, -			   cmd.optlen)) { -		ret = -EFAULT; -		goto out2; +	optval = memdup_user((void __user *) (unsigned long) cmd.optval, +			     cmd.optlen); +	if (IS_ERR(optval)) { +		ret = PTR_ERR(optval); +		goto out;  	}  	ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,  				    cmd.optlen); -out2:  	kfree(optval); -out1: + +out:  	ucma_put_ctx(ctx);  	return ret;  } @@ -991,23 +1219,23 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf,  	return ret;  } -static ssize_t ucma_join_multicast(struct ucma_file *file, -				   const char __user *inbuf, -				   int in_len, int out_len) +static ssize_t ucma_process_join(struct ucma_file *file, +				 struct rdma_ucm_join_mcast *cmd,  int out_len)  { -	struct rdma_ucm_join_mcast cmd;  	struct rdma_ucm_create_id_resp resp;  	struct ucma_context *ctx;  	struct ucma_multicast *mc; +	struct sockaddr *addr;  	int ret;  	if (out_len < sizeof(resp))  		return -ENOSPC; -	if (copy_from_user(&cmd, inbuf, sizeof(cmd))) -		return -EFAULT; +	addr = (struct sockaddr *) &cmd->addr; +	if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) +		return -EINVAL; -	ctx = ucma_get_ctx(file, cmd.id); +	ctx = ucma_get_ctx(file, cmd->id);  	if (IS_ERR(ctx))  		return PTR_ERR(ctx); @@ -1018,14 +1246,14 @@ static ssize_t ucma_join_multicast(struct ucma_file *file,  		goto err1;  	} -	mc->uid = cmd.uid; -	memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); +	mc->uid = cmd->uid; +	memcpy(&mc->addr, addr, cmd->addr_size);  	ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc);  	if (ret)  		goto err2;  	resp.id = mc->id; -	if (copy_to_user((void __user *)(unsigned long)cmd.response, +	if (copy_to_user((void __user *)(unsigned long) cmd->response,  			 &resp, sizeof(resp))) {  		ret = -EFAULT;  		goto err3; @@ -1050,6 +1278,38 @@ err1:  	return ret;  } +static ssize_t ucma_join_ip_multicast(struct ucma_file *file, +				      const char __user *inbuf, +				      int in_len, int out_len) +{ +	struct rdma_ucm_join_ip_mcast cmd; +	struct rdma_ucm_join_mcast join_cmd; + +	if (copy_from_user(&cmd, inbuf, sizeof(cmd))) +		return -EFAULT; + +	join_cmd.response = cmd.response; +	join_cmd.uid = cmd.uid; +	join_cmd.id = cmd.id; +	join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); +	join_cmd.reserved = 0; +	memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); + +	return ucma_process_join(file, &join_cmd, out_len); +} + +static ssize_t ucma_join_multicast(struct ucma_file *file, +				   const char __user *inbuf, +				   int in_len, int out_len) +{ +	struct rdma_ucm_join_mcast cmd; + +	if (copy_from_user(&cmd, inbuf, sizeof(cmd))) +		return -EFAULT; + +	return ucma_process_join(file, &cmd, out_len); +} +  static ssize_t ucma_leave_multicast(struct ucma_file *file,  				    const char __user *inbuf,  				    int in_len, int out_len) @@ -1138,7 +1398,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,  	struct rdma_ucm_migrate_id cmd;  	struct rdma_ucm_migrate_resp resp;  	struct ucma_context *ctx; -	struct file *filp; +	struct fd f;  	struct ucma_file *cur_file;  	int ret = 0; @@ -1146,12 +1406,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,  		return -EFAULT;  	/* Get current fd to protect against it being closed */ -	filp = fget(cmd.fd); -	if (!filp) +	f = fdget(cmd.fd); +	if (!f.file)  		return -ENOENT;  	/* Validate current fd and prevent destruction of id. */ -	ctx = ucma_get_ctx(filp->private_data, cmd.id); +	ctx = ucma_get_ctx(f.file->private_data, cmd.id);  	if (IS_ERR(ctx)) {  		ret = PTR_ERR(ctx);  		goto file_put; @@ -1185,32 +1445,36 @@ response:  	ucma_put_ctx(ctx);  file_put: -	fput(filp); +	fdput(f);  	return ret;  }  static ssize_t (*ucma_cmd_table[])(struct ucma_file *file,  				   const char __user *inbuf,  				   int in_len, int out_len) = { -	[RDMA_USER_CM_CMD_CREATE_ID]	= ucma_create_id, -	[RDMA_USER_CM_CMD_DESTROY_ID]	= ucma_destroy_id, -	[RDMA_USER_CM_CMD_BIND_ADDR]	= ucma_bind_addr, -	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	= ucma_resolve_addr, -	[RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route, -	[RDMA_USER_CM_CMD_QUERY_ROUTE]	= ucma_query_route, -	[RDMA_USER_CM_CMD_CONNECT]	= ucma_connect, -	[RDMA_USER_CM_CMD_LISTEN]	= ucma_listen, -	[RDMA_USER_CM_CMD_ACCEPT]	= ucma_accept, -	[RDMA_USER_CM_CMD_REJECT]	= ucma_reject, -	[RDMA_USER_CM_CMD_DISCONNECT]	= ucma_disconnect, -	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	= ucma_init_qp_attr, -	[RDMA_USER_CM_CMD_GET_EVENT]	= ucma_get_event, -	[RDMA_USER_CM_CMD_GET_OPTION]	= NULL, -	[RDMA_USER_CM_CMD_SET_OPTION]	= ucma_set_option, -	[RDMA_USER_CM_CMD_NOTIFY]	= ucma_notify, -	[RDMA_USER_CM_CMD_JOIN_MCAST]	= ucma_join_multicast, -	[RDMA_USER_CM_CMD_LEAVE_MCAST]	= ucma_leave_multicast, -	[RDMA_USER_CM_CMD_MIGRATE_ID]	= ucma_migrate_id +	[RDMA_USER_CM_CMD_CREATE_ID] 	 = ucma_create_id, +	[RDMA_USER_CM_CMD_DESTROY_ID]	 = ucma_destroy_id, +	[RDMA_USER_CM_CMD_BIND_IP]	 = ucma_bind_ip, +	[RDMA_USER_CM_CMD_RESOLVE_IP]	 = ucma_resolve_ip, +	[RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, +	[RDMA_USER_CM_CMD_QUERY_ROUTE]	 = ucma_query_route, +	[RDMA_USER_CM_CMD_CONNECT]	 = ucma_connect, +	[RDMA_USER_CM_CMD_LISTEN]	 = ucma_listen, +	[RDMA_USER_CM_CMD_ACCEPT]	 = ucma_accept, +	[RDMA_USER_CM_CMD_REJECT]	 = ucma_reject, +	[RDMA_USER_CM_CMD_DISCONNECT]	 = ucma_disconnect, +	[RDMA_USER_CM_CMD_INIT_QP_ATTR]	 = ucma_init_qp_attr, +	[RDMA_USER_CM_CMD_GET_EVENT]	 = ucma_get_event, +	[RDMA_USER_CM_CMD_GET_OPTION]	 = NULL, +	[RDMA_USER_CM_CMD_SET_OPTION]	 = ucma_set_option, +	[RDMA_USER_CM_CMD_NOTIFY]	 = ucma_notify, +	[RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, +	[RDMA_USER_CM_CMD_LEAVE_MCAST]	 = ucma_leave_multicast, +	[RDMA_USER_CM_CMD_MIGRATE_ID]	 = ucma_migrate_id, +	[RDMA_USER_CM_CMD_QUERY]	 = ucma_query, +	[RDMA_USER_CM_CMD_BIND]		 = ucma_bind, +	[RDMA_USER_CM_CMD_RESOLVE_ADDR]	 = ucma_resolve_addr, +	[RDMA_USER_CM_CMD_JOIN_MCAST]	 = ucma_join_multicast  };  static ssize_t ucma_write(struct file *filp, const char __user *buf, @@ -1226,7 +1490,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,  	if (copy_from_user(&hdr, buf, sizeof(hdr)))  		return -EFAULT; -	if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) +	if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))  		return -EINVAL;  	if (hdr.in + sizeof(hdr) > len) @@ -1313,9 +1577,11 @@ static const struct file_operations ucma_fops = {  };  static struct miscdevice ucma_misc = { -	.minor	= MISC_DYNAMIC_MINOR, -	.name	= "rdma_cm", -	.fops	= &ucma_fops, +	.minor		= MISC_DYNAMIC_MINOR, +	.name		= "rdma_cm", +	.nodename	= "infiniband/rdma_cm", +	.mode		= 0666, +	.fops		= &ucma_fops,  };  static ssize_t show_abi_version(struct device *dev, @@ -1340,7 +1606,7 @@ static int __init ucma_init(void)  		goto err1;  	} -	ucma_ctl_table_hdr = register_sysctl_paths(ucma_ctl_path, ucma_ctl_table); +	ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);  	if (!ucma_ctl_table_hdr) {  		printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n");  		ret = -ENOMEM; @@ -1356,7 +1622,7 @@ err1:  static void __exit ucma_cleanup(void)  { -	unregister_sysctl_table(ucma_ctl_table_hdr); +	unregister_net_sysctl_table(ucma_ctl_table_hdr);  	device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);  	misc_deregister(&ucma_misc);  	idr_destroy(&ctx_idr); diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index bb7e1928082..72feee620eb 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -33,6 +33,7 @@  #include <linux/errno.h>  #include <linux/string.h> +#include <linux/export.h>  #include <linux/if_ether.h>  #include <rdma/ib_pack.h> @@ -278,36 +279,6 @@ void ib_ud_header_init(int     		    payload_bytes,  EXPORT_SYMBOL(ib_ud_header_init);  /** - * ib_lrh_header_pack - Pack LRH header struct into wire format - * @lrh:unpacked LRH header struct - * @buf:Buffer to pack into - * - * ib_lrh_header_pack() packs the LRH header structure @lrh into - * wire format in the buffer @buf. - */ -int ib_lrh_header_pack(struct ib_unpacked_lrh *lrh, void *buf) -{ -	ib_pack(lrh_table, ARRAY_SIZE(lrh_table), lrh, buf); -	return 0; -} -EXPORT_SYMBOL(ib_lrh_header_pack); - -/** - * ib_lrh_header_unpack - Unpack LRH structure from wire format - * @lrh:unpacked LRH header struct - * @buf:Buffer to pack into - * - * ib_lrh_header_unpack() unpacks the LRH header structure from - * wire format (in buf) into @lrh. - */ -int ib_lrh_header_unpack(void *buf, struct ib_unpacked_lrh *lrh) -{ -	ib_unpack(lrh_table, ARRAY_SIZE(lrh_table), buf, lrh); -	return 0; -} -EXPORT_SYMBOL(ib_lrh_header_unpack); - -/**   * ib_ud_header_pack - Pack UD header struct into wire format   * @header:UD header struct   * @buf:Buffer to pack into diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 415e186eee3..a3a2e9c1639 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -35,35 +35,36 @@  #include <linux/mm.h>  #include <linux/dma-mapping.h>  #include <linux/sched.h> +#include <linux/export.h>  #include <linux/hugetlb.h>  #include <linux/dma-attrs.h>  #include <linux/slab.h>  #include "uverbs.h" -#define IB_UMEM_MAX_PAGE_CHUNK						\ -	((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) /	\ -	 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -	\ -	  (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))  static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)  { -	struct ib_umem_chunk *chunk, *tmp; +	struct scatterlist *sg; +	struct page *page;  	int i; -	list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { -		ib_dma_unmap_sg(dev, chunk->page_list, -				chunk->nents, DMA_BIDIRECTIONAL); -		for (i = 0; i < chunk->nents; ++i) { -			struct page *page = sg_page(&chunk->page_list[i]); +	if (umem->nmap > 0) +		ib_dma_unmap_sg(dev, umem->sg_head.sgl, +				umem->nmap, +				DMA_BIDIRECTIONAL); -			if (umem->writable && dirty) -				set_page_dirty_lock(page); -			put_page(page); -		} +	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { -		kfree(chunk); +		page = sg_page(sg); +		if (umem->writable && dirty) +			set_page_dirty_lock(page); +		put_page(page);  	} + +	sg_free_table(&umem->sg_head); +	return; +  }  /** @@ -80,15 +81,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	struct ib_umem *umem;  	struct page **page_list;  	struct vm_area_struct **vma_list; -	struct ib_umem_chunk *chunk;  	unsigned long locked;  	unsigned long lock_limit;  	unsigned long cur_base;  	unsigned long npages;  	int ret; -	int off;  	int i;  	DEFINE_DMA_ATTRS(attrs); +	struct scatterlist *sg, *sg_list_start; +	int need_release = 0;  	if (dmasync)  		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); @@ -96,7 +97,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	if (!can_do_mlock())  		return ERR_PTR(-EPERM); -	umem = kmalloc(sizeof *umem, GFP_KERNEL); +	umem = kzalloc(sizeof *umem, GFP_KERNEL);  	if (!umem)  		return ERR_PTR(-ENOMEM); @@ -116,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	/* We assume the memory is from hugetlb until proved otherwise */  	umem->hugetlb   = 1; -	INIT_LIST_HEAD(&umem->chunk_list); -  	page_list = (struct page **) __get_free_page(GFP_KERNEL);  	if (!page_list) {  		kfree(umem); @@ -136,7 +135,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	down_write(¤t->mm->mmap_sem); -	locked     = npages + current->mm->locked_vm; +	locked     = npages + current->mm->pinned_vm;  	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;  	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { @@ -146,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	cur_base = addr & PAGE_MASK; -	ret = 0; +	if (npages == 0) { +		ret = -EINVAL; +		goto out; +	} + +	ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); +	if (ret) +		goto out; + +	need_release = 1; +	sg_list_start = umem->sg_head.sgl; +  	while (npages) {  		ret = get_user_pages(current, current->mm, cur_base,  				     min_t(unsigned long, npages, @@ -156,57 +166,41 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  		if (ret < 0)  			goto out; +		umem->npages += ret;  		cur_base += ret * PAGE_SIZE;  		npages   -= ret; -		off = 0; - -		while (ret) { -			chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * -					min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), -					GFP_KERNEL); -			if (!chunk) { -				ret = -ENOMEM; -				goto out; -			} - -			chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); -			sg_init_table(chunk->page_list, chunk->nents); -			for (i = 0; i < chunk->nents; ++i) { -				if (vma_list && -				    !is_vm_hugetlb_page(vma_list[i + off])) -					umem->hugetlb = 0; -				sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0); -			} - -			chunk->nmap = ib_dma_map_sg_attrs(context->device, -							  &chunk->page_list[0], -							  chunk->nents, -							  DMA_BIDIRECTIONAL, -							  &attrs); -			if (chunk->nmap <= 0) { -				for (i = 0; i < chunk->nents; ++i) -					put_page(sg_page(&chunk->page_list[i])); -				kfree(chunk); - -				ret = -ENOMEM; -				goto out; -			} - -			ret -= chunk->nents; -			off += chunk->nents; -			list_add_tail(&chunk->list, &umem->chunk_list); +		for_each_sg(sg_list_start, sg, ret, i) { +			if (vma_list && !is_vm_hugetlb_page(vma_list[i])) +				umem->hugetlb = 0; + +			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);  		} -		ret = 0; +		/* preparing for next loop */ +		sg_list_start = sg;  	} +	umem->nmap = ib_dma_map_sg_attrs(context->device, +				  umem->sg_head.sgl, +				  umem->npages, +				  DMA_BIDIRECTIONAL, +				  &attrs); + +	if (umem->nmap <= 0) { +		ret = -ENOMEM; +		goto out; +	} + +	ret = 0; +  out:  	if (ret < 0) { -		__ib_umem_release(context->device, umem, 0); +		if (need_release) +			__ib_umem_release(context->device, umem, 0);  		kfree(umem);  	} else -		current->mm->locked_vm = locked; +		current->mm->pinned_vm = locked;  	up_write(¤t->mm->mmap_sem);  	if (vma_list) @@ -222,7 +216,7 @@ static void ib_umem_account(struct work_struct *work)  	struct ib_umem *umem = container_of(work, struct ib_umem, work);  	down_write(&umem->mm->mmap_sem); -	umem->mm->locked_vm -= umem->diff; +	umem->mm->pinned_vm -= umem->diff;  	up_write(&umem->mm->mmap_sem);  	mmput(umem->mm);  	kfree(umem); @@ -262,13 +256,13 @@ void ib_umem_release(struct ib_umem *umem)  			umem->mm   = mm;  			umem->diff = diff; -			schedule_work(&umem->work); +			queue_work(ib_wq, &umem->work);  			return;  		}  	} else  		down_write(&mm->mmap_sem); -	current->mm->locked_vm -= diff; +	current->mm->pinned_vm -= diff;  	up_write(&mm->mmap_sem);  	mmput(mm);  	kfree(umem); @@ -277,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release);  int ib_umem_page_count(struct ib_umem *umem)  { -	struct ib_umem_chunk *chunk;  	int shift;  	int i;  	int n; +	struct scatterlist *sg;  	shift = ilog2(umem->page_size);  	n = 0; -	list_for_each_entry(chunk, &umem->chunk_list, list) -		for (i = 0; i < chunk->nmap; ++i) -			n += sg_dma_len(&chunk->page_list[i]) >> shift; +	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) +		n += sg_dma_len(sg) >> shift;  	return n;  } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index cd1996d0ad0..1acb9910055 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -98,7 +98,7 @@ struct ib_umad_port {  struct ib_umad_device {  	int                  start_port, end_port; -	struct kref          ref; +	struct kobject       kobj;  	struct ib_umad_port  port[0];  }; @@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);  static void ib_umad_add_one(struct ib_device *device);  static void ib_umad_remove_one(struct ib_device *device); -static void ib_umad_release_dev(struct kref *ref) +static void ib_umad_release_dev(struct kobject *kobj)  {  	struct ib_umad_device *dev = -		container_of(ref, struct ib_umad_device, ref); +		container_of(kobj, struct ib_umad_device, kobj);  	kfree(dev);  } +static struct kobj_type ib_umad_dev_ktype = { +	.release = ib_umad_release_dev, +}; +  static int hdr_size(struct ib_umad_file *file)  {  	return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : @@ -458,8 +462,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,  		goto err;  	} -	if (packet->mad.hdr.id < 0 || -	    packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { +	if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {  		ret = -EINVAL;  		goto err;  	} @@ -703,7 +706,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)  	mutex_lock(&file->port->file_mutex);  	mutex_lock(&file->mutex); -	if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { +	if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {  		ret = -EINVAL;  		goto out;  	} @@ -781,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp)  {  	struct ib_umad_port *port;  	struct ib_umad_file *file; -	int ret; +	int ret = -ENXIO;  	port = container_of(inode->i_cdev, struct ib_umad_port, cdev); -	if (port) -		kref_get(&port->umad_dev->ref); -	else -		return -ENXIO;  	mutex_lock(&port->file_mutex); -	if (!port->ib_dev) { -		ret = -ENXIO; +	if (!port->ib_dev)  		goto out; -	} +	ret = -ENOMEM;  	file = kzalloc(sizeof *file, GFP_KERNEL); -	if (!file) { -		kref_put(&port->umad_dev->ref, ib_umad_release_dev); -		ret = -ENOMEM; +	if (!file)  		goto out; -	}  	mutex_init(&file->mutex);  	spin_lock_init(&file->send_lock); @@ -815,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp)  	list_add_tail(&file->port_list, &port->file_list);  	ret = nonseekable_open(inode, filp); +	if (ret) { +		list_del(&file->port_list); +		kfree(file); +		goto out; +	} + +	kobject_get(&port->umad_dev->kobj);  out:  	mutex_unlock(&port->file_mutex); @@ -853,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)  	mutex_unlock(&file->port->file_mutex);  	kfree(file); -	kref_put(&dev->ref, ib_umad_release_dev); +	kobject_put(&dev->kobj);  	return 0;  } @@ -881,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)  	int ret;  	port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); -	if (port) -		kref_get(&port->umad_dev->ref); -	else -		return -ENXIO;  	if (filp->f_flags & O_NONBLOCK) {  		if (down_trylock(&port->sm_sem)) { @@ -899,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)  	}  	ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); -	if (ret) { -		up(&port->sm_sem); -		goto fail; -	} +	if (ret) +		goto err_up_sem;  	filp->private_data = port; -	return nonseekable_open(inode, filp); +	ret = nonseekable_open(inode, filp); +	if (ret) +		goto err_clr_sm_cap; + +	kobject_get(&port->umad_dev->kobj); + +	return 0; + +err_clr_sm_cap: +	swap(props.set_port_cap_mask, props.clr_port_cap_mask); +	ib_modify_port(port->ib_dev, port->port_num, 0, &props); + +err_up_sem: +	up(&port->sm_sem);  fail: -	kref_put(&port->umad_dev->ref, ib_umad_release_dev);  	return ret;  } @@ -928,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)  	up(&port->sm_sem); -	kref_put(&port->umad_dev->ref, ib_umad_release_dev); +	kobject_put(&port->umad_dev->kobj);  	return ret;  } @@ -996,6 +1004,7 @@ static int find_overflow_devnum(void)  }  static int ib_umad_init_port(struct ib_device *device, int port_num, +			     struct ib_umad_device *umad_dev,  			     struct ib_umad_port *port)  {  	int devnum; @@ -1028,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,  	cdev_init(&port->cdev, &umad_fops);  	port->cdev.owner = THIS_MODULE; +	port->cdev.kobj.parent = &umad_dev->kobj;  	kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);  	if (cdev_add(&port->cdev, base, 1))  		goto err_cdev; @@ -1046,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,  	base += IB_UMAD_MAX_PORTS;  	cdev_init(&port->sm_cdev, &umad_sm_fops);  	port->sm_cdev.owner = THIS_MODULE; +	port->sm_cdev.kobj.parent = &umad_dev->kobj;  	kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);  	if (cdev_add(&port->sm_cdev, base, 1))  		goto err_sm_cdev; @@ -1139,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device)  	if (!umad_dev)  		return; -	kref_init(&umad_dev->ref); +	kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);  	umad_dev->start_port = s;  	umad_dev->end_port   = e; @@ -1147,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device)  	for (i = s; i <= e; ++i) {  		umad_dev->port[i - s].umad_dev = umad_dev; -		if (ib_umad_init_port(device, i, &umad_dev->port[i - s])) +		if (ib_umad_init_port(device, i, umad_dev, +				      &umad_dev->port[i - s]))  			goto err;  	} @@ -1159,7 +1171,7 @@ err:  	while (--i >= s)  		ib_umad_kill_port(&umad_dev->port[i - s]); -	kref_put(&umad_dev->ref, ib_umad_release_dev); +	kobject_put(&umad_dev->kobj);  }  static void ib_umad_remove_one(struct ib_device *device) @@ -1173,7 +1185,12 @@ static void ib_umad_remove_one(struct ib_device *device)  	for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)  		ib_umad_kill_port(&umad_dev->port[i]); -	kref_put(&umad_dev->ref, ib_umad_release_dev); +	kobject_put(&umad_dev->kobj); +} + +static char *umad_devnode(struct device *dev, umode_t *mode) +{ +	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));  }  static int __init ib_umad_init(void) @@ -1194,6 +1211,8 @@ static int __init ib_umad_init(void)  		goto out_chrdev;  	} +	umad_class->devnode = umad_devnode; +  	ret = class_create_file(umad_class, &class_attr_abi_version.attr);  	if (ret) {  		printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a078e5624d2..a283274a5a0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -47,6 +47,22 @@  #include <rdma/ib_umem.h>  #include <rdma/ib_user_verbs.h> +#define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\ +	do {								\ +		(udata)->inbuf  = (const void __user *) (ibuf);		\ +		(udata)->outbuf = (void __user *) (obuf);		\ +		(udata)->inlen  = (ilen);				\ +		(udata)->outlen = (olen);				\ +	} while (0) + +#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen)			\ +	do {									\ +		(udata)->inbuf  = (ilen) ? (const void __user *) (ibuf) : NULL;	\ +		(udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL;	\ +		(udata)->inlen  = (ilen);					\ +		(udata)->outlen = (olen);					\ +	} while (0) +  /*   * Our lifetime rules for these structs are the following:   * @@ -76,6 +92,8 @@ struct ib_uverbs_device {  	struct ib_device		       *ib_dev;  	int					devnum;  	struct cdev			        cdev; +	struct rb_root				xrcd_tree; +	struct mutex				xrcd_tree_mutex;  };  struct ib_uverbs_event_file { @@ -120,9 +138,20 @@ struct ib_uevent_object {  	u32			events_reported;  }; +struct ib_uxrcd_object { +	struct ib_uobject	uobject; +	atomic_t		refcnt; +}; + +struct ib_usrq_object { +	struct ib_uevent_object	uevent; +	struct ib_uxrcd_object *uxrcd; +}; +  struct ib_uqp_object {  	struct ib_uevent_object	uevent;  	struct list_head 	mcast_list; +	struct ib_uxrcd_object *uxrcd;  };  struct ib_ucq_object { @@ -142,6 +171,8 @@ extern struct idr ib_uverbs_ah_idr;  extern struct idr ib_uverbs_cq_idr;  extern struct idr ib_uverbs_qp_idr;  extern struct idr ib_uverbs_srq_idr; +extern struct idr ib_uverbs_xrcd_idr; +extern struct idr ib_uverbs_rule_idr;  void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -161,6 +192,23 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);  void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);  void ib_uverbs_event_handler(struct ib_event_handler *handler,  			     struct ib_event *event); +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); + +struct ib_uverbs_flow_spec { +	union { +		union { +			struct ib_uverbs_flow_spec_hdr hdr; +			struct { +				__u32 type; +				__u16 size; +				__u16 reserved; +			}; +		}; +		struct ib_uverbs_flow_spec_eth     eth; +		struct ib_uverbs_flow_spec_ipv4    ipv4; +		struct ib_uverbs_flow_spec_tcp_udp tcp_udp; +	}; +};  #define IB_UVERBS_DECLARE_CMD(name)					\  	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\ @@ -174,6 +222,8 @@ IB_UVERBS_DECLARE_CMD(alloc_pd);  IB_UVERBS_DECLARE_CMD(dealloc_pd);  IB_UVERBS_DECLARE_CMD(reg_mr);  IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(alloc_mw); +IB_UVERBS_DECLARE_CMD(dealloc_mw);  IB_UVERBS_DECLARE_CMD(create_comp_channel);  IB_UVERBS_DECLARE_CMD(create_cq);  IB_UVERBS_DECLARE_CMD(resize_cq); @@ -181,6 +231,7 @@ IB_UVERBS_DECLARE_CMD(poll_cq);  IB_UVERBS_DECLARE_CMD(req_notify_cq);  IB_UVERBS_DECLARE_CMD(destroy_cq);  IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(open_qp);  IB_UVERBS_DECLARE_CMD(query_qp);  IB_UVERBS_DECLARE_CMD(modify_qp);  IB_UVERBS_DECLARE_CMD(destroy_qp); @@ -195,5 +246,16 @@ IB_UVERBS_DECLARE_CMD(create_srq);  IB_UVERBS_DECLARE_CMD(modify_srq);  IB_UVERBS_DECLARE_CMD(query_srq);  IB_UVERBS_DECLARE_CMD(destroy_srq); +IB_UVERBS_DECLARE_CMD(create_xsrq); +IB_UVERBS_DECLARE_CMD(open_xrcd); +IB_UVERBS_DECLARE_CMD(close_xrcd); + +#define IB_UVERBS_DECLARE_EX_CMD(name)				\ +	int ib_uverbs_ex_##name(struct ib_uverbs_file *file,	\ +				struct ib_udata *ucore,		\ +				struct ib_udata *uhw) + +IB_UVERBS_DECLARE_EX_CMD(create_flow); +IB_UVERBS_DECLARE_EX_CMD(destroy_flow);  #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b342248aec0..ea6203ee7bc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -40,21 +40,22 @@  #include <asm/uaccess.h>  #include "uverbs.h" - -static struct lock_class_key pd_lock_key; -static struct lock_class_key mr_lock_key; -static struct lock_class_key cq_lock_key; -static struct lock_class_key qp_lock_key; -static struct lock_class_key ah_lock_key; -static struct lock_class_key srq_lock_key; - -#define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\ -	do {								\ -		(udata)->inbuf  = (void __user *) (ibuf);		\ -		(udata)->outbuf = (void __user *) (obuf);		\ -		(udata)->inlen  = (ilen);				\ -		(udata)->outlen = (olen);				\ -	} while (0) +#include "core_priv.h" + +struct uverbs_lock_class { +	struct lock_class_key	key; +	char			name[16]; +}; + +static struct uverbs_lock_class pd_lock_class	= { .name = "PD-uobj" }; +static struct uverbs_lock_class mr_lock_class	= { .name = "MR-uobj" }; +static struct uverbs_lock_class mw_lock_class	= { .name = "MW-uobj" }; +static struct uverbs_lock_class cq_lock_class	= { .name = "CQ-uobj" }; +static struct uverbs_lock_class qp_lock_class	= { .name = "QP-uobj" }; +static struct uverbs_lock_class ah_lock_class	= { .name = "AH-uobj" }; +static struct uverbs_lock_class srq_lock_class	= { .name = "SRQ-uobj" }; +static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; +static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" };  /*   * The ib_uobject locking scheme is as follows: @@ -82,13 +83,13 @@ static struct lock_class_key srq_lock_key;   */  static void init_uobj(struct ib_uobject *uobj, u64 user_handle, -		      struct ib_ucontext *context, struct lock_class_key *key) +		      struct ib_ucontext *context, struct uverbs_lock_class *c)  {  	uobj->user_handle = user_handle;  	uobj->context     = context;  	kref_init(&uobj->ref);  	init_rwsem(&uobj->mutex); -	lockdep_set_class(&uobj->mutex, key); +	lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name);  	uobj->live        = 0;  } @@ -118,18 +119,17 @@ static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj)  {  	int ret; -retry: -	if (!idr_pre_get(idr, GFP_KERNEL)) -		return -ENOMEM; - +	idr_preload(GFP_KERNEL);  	spin_lock(&ib_uverbs_idr_lock); -	ret = idr_get_new(idr, uobj, &uobj->id); -	spin_unlock(&ib_uverbs_idr_lock); -	if (ret == -EAGAIN) -		goto retry; +	ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); +	if (ret >= 0) +		uobj->id = ret; -	return ret; +	spin_unlock(&ib_uverbs_idr_lock); +	idr_preload_end(); + +	return ret < 0 ? ret : 0;  }  void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) @@ -240,11 +240,24 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)  	return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);  } +static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) +{ +	struct ib_uobject *uobj; + +	uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); +	return uobj ? uobj->object : NULL; +} +  static void put_qp_read(struct ib_qp *qp)  {  	put_uobj_read(qp->uobject);  } +static void put_qp_write(struct ib_qp *qp) +{ +	put_uobj_write(qp->uobject); +} +  static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)  {  	return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); @@ -255,6 +268,18 @@ static void put_srq_read(struct ib_srq *srq)  	put_uobj_read(srq->uobject);  } +static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context, +				     struct ib_uobject **uobj) +{ +	*uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0); +	return *uobj ? (*uobj)->object : NULL; +} + +static void put_xrcd_read(struct ib_uobject *uobj) +{ +	put_uobj_read(uobj); +} +  ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,  			      const char __user *buf,  			      int in_len, int out_len) @@ -298,11 +323,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,  	INIT_LIST_HEAD(&ucontext->qp_list);  	INIT_LIST_HEAD(&ucontext->srq_list);  	INIT_LIST_HEAD(&ucontext->ah_list); +	INIT_LIST_HEAD(&ucontext->xrcd_list); +	INIT_LIST_HEAD(&ucontext->rule_list);  	ucontext->closing = 0;  	resp.num_comp_vectors = file->device->num_comp_vectors; -	ret = get_unused_fd(); +	ret = get_unused_fd_flags(O_CLOEXEC);  	if (ret < 0)  		goto err_free;  	resp.async_fd = ret; @@ -495,7 +522,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,  	if (!uobj)  		return -ENOMEM; -	init_uobj(uobj, 0, file->ucontext, &pd_lock_key); +	init_uobj(uobj, 0, file->ucontext, &pd_lock_class);  	down_write(&uobj->mutex);  	pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, @@ -579,6 +606,305 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,  	return in_len;  } +struct xrcd_table_entry { +	struct rb_node  node; +	struct ib_xrcd *xrcd; +	struct inode   *inode; +}; + +static int xrcd_table_insert(struct ib_uverbs_device *dev, +			    struct inode *inode, +			    struct ib_xrcd *xrcd) +{ +	struct xrcd_table_entry *entry, *scan; +	struct rb_node **p = &dev->xrcd_tree.rb_node; +	struct rb_node *parent = NULL; + +	entry = kmalloc(sizeof *entry, GFP_KERNEL); +	if (!entry) +		return -ENOMEM; + +	entry->xrcd  = xrcd; +	entry->inode = inode; + +	while (*p) { +		parent = *p; +		scan = rb_entry(parent, struct xrcd_table_entry, node); + +		if (inode < scan->inode) { +			p = &(*p)->rb_left; +		} else if (inode > scan->inode) { +			p = &(*p)->rb_right; +		} else { +			kfree(entry); +			return -EEXIST; +		} +	} + +	rb_link_node(&entry->node, parent, p); +	rb_insert_color(&entry->node, &dev->xrcd_tree); +	igrab(inode); +	return 0; +} + +static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev, +						  struct inode *inode) +{ +	struct xrcd_table_entry *entry; +	struct rb_node *p = dev->xrcd_tree.rb_node; + +	while (p) { +		entry = rb_entry(p, struct xrcd_table_entry, node); + +		if (inode < entry->inode) +			p = p->rb_left; +		else if (inode > entry->inode) +			p = p->rb_right; +		else +			return entry; +	} + +	return NULL; +} + +static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode) +{ +	struct xrcd_table_entry *entry; + +	entry = xrcd_table_search(dev, inode); +	if (!entry) +		return NULL; + +	return entry->xrcd; +} + +static void xrcd_table_delete(struct ib_uverbs_device *dev, +			      struct inode *inode) +{ +	struct xrcd_table_entry *entry; + +	entry = xrcd_table_search(dev, inode); +	if (entry) { +		iput(inode); +		rb_erase(&entry->node, &dev->xrcd_tree); +		kfree(entry); +	} +} + +ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, +			    const char __user *buf, int in_len, +			    int out_len) +{ +	struct ib_uverbs_open_xrcd	cmd; +	struct ib_uverbs_open_xrcd_resp	resp; +	struct ib_udata			udata; +	struct ib_uxrcd_object         *obj; +	struct ib_xrcd                 *xrcd = NULL; +	struct fd			f = {NULL, 0}; +	struct inode                   *inode = NULL; +	int				ret = 0; +	int				new_xrcd = 0; + +	if (out_len < sizeof resp) +		return -ENOSPC; + +	if (copy_from_user(&cmd, buf, sizeof cmd)) +		return -EFAULT; + +	INIT_UDATA(&udata, buf + sizeof cmd, +		   (unsigned long) cmd.response + sizeof resp, +		   in_len - sizeof cmd, out_len - sizeof  resp); + +	mutex_lock(&file->device->xrcd_tree_mutex); + +	if (cmd.fd != -1) { +		/* search for file descriptor */ +		f = fdget(cmd.fd); +		if (!f.file) { +			ret = -EBADF; +			goto err_tree_mutex_unlock; +		} + +		inode = file_inode(f.file); +		xrcd = find_xrcd(file->device, inode); +		if (!xrcd && !(cmd.oflags & O_CREAT)) { +			/* no file descriptor. Need CREATE flag */ +			ret = -EAGAIN; +			goto err_tree_mutex_unlock; +		} + +		if (xrcd && cmd.oflags & O_EXCL) { +			ret = -EINVAL; +			goto err_tree_mutex_unlock; +		} +	} + +	obj = kmalloc(sizeof *obj, GFP_KERNEL); +	if (!obj) { +		ret = -ENOMEM; +		goto err_tree_mutex_unlock; +	} + +	init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); + +	down_write(&obj->uobject.mutex); + +	if (!xrcd) { +		xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev, +							file->ucontext, &udata); +		if (IS_ERR(xrcd)) { +			ret = PTR_ERR(xrcd); +			goto err; +		} + +		xrcd->inode   = inode; +		xrcd->device  = file->device->ib_dev; +		atomic_set(&xrcd->usecnt, 0); +		mutex_init(&xrcd->tgt_qp_mutex); +		INIT_LIST_HEAD(&xrcd->tgt_qp_list); +		new_xrcd = 1; +	} + +	atomic_set(&obj->refcnt, 0); +	obj->uobject.object = xrcd; +	ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); +	if (ret) +		goto err_idr; + +	memset(&resp, 0, sizeof resp); +	resp.xrcd_handle = obj->uobject.id; + +	if (inode) { +		if (new_xrcd) { +			/* create new inode/xrcd table entry */ +			ret = xrcd_table_insert(file->device, inode, xrcd); +			if (ret) +				goto err_insert_xrcd; +		} +		atomic_inc(&xrcd->usecnt); +	} + +	if (copy_to_user((void __user *) (unsigned long) cmd.response, +			 &resp, sizeof resp)) { +		ret = -EFAULT; +		goto err_copy; +	} + +	if (f.file) +		fdput(f); + +	mutex_lock(&file->mutex); +	list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); +	mutex_unlock(&file->mutex); + +	obj->uobject.live = 1; +	up_write(&obj->uobject.mutex); + +	mutex_unlock(&file->device->xrcd_tree_mutex); +	return in_len; + +err_copy: +	if (inode) { +		if (new_xrcd) +			xrcd_table_delete(file->device, inode); +		atomic_dec(&xrcd->usecnt); +	} + +err_insert_xrcd: +	idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject); + +err_idr: +	ib_dealloc_xrcd(xrcd); + +err: +	put_uobj_write(&obj->uobject); + +err_tree_mutex_unlock: +	if (f.file) +		fdput(f); + +	mutex_unlock(&file->device->xrcd_tree_mutex); + +	return ret; +} + +ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file, +			     const char __user *buf, int in_len, +			     int out_len) +{ +	struct ib_uverbs_close_xrcd cmd; +	struct ib_uobject           *uobj; +	struct ib_xrcd              *xrcd = NULL; +	struct inode                *inode = NULL; +	struct ib_uxrcd_object      *obj; +	int                         live; +	int                         ret = 0; + +	if (copy_from_user(&cmd, buf, sizeof cmd)) +		return -EFAULT; + +	mutex_lock(&file->device->xrcd_tree_mutex); +	uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext); +	if (!uobj) { +		ret = -EINVAL; +		goto out; +	} + +	xrcd  = uobj->object; +	inode = xrcd->inode; +	obj   = container_of(uobj, struct ib_uxrcd_object, uobject); +	if (atomic_read(&obj->refcnt)) { +		put_uobj_write(uobj); +		ret = -EBUSY; +		goto out; +	} + +	if (!inode || atomic_dec_and_test(&xrcd->usecnt)) { +		ret = ib_dealloc_xrcd(uobj->object); +		if (!ret) +			uobj->live = 0; +	} + +	live = uobj->live; +	if (inode && ret) +		atomic_inc(&xrcd->usecnt); + +	put_uobj_write(uobj); + +	if (ret) +		goto out; + +	if (inode && !live) +		xrcd_table_delete(file->device, inode); + +	idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); +	mutex_lock(&file->mutex); +	list_del(&uobj->list); +	mutex_unlock(&file->mutex); + +	put_uobj(uobj); +	ret = in_len; + +out: +	mutex_unlock(&file->device->xrcd_tree_mutex); +	return ret; +} + +void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, +			    struct ib_xrcd *xrcd) +{ +	struct inode *inode; + +	inode = xrcd->inode; +	if (inode && !atomic_dec_and_test(&xrcd->usecnt)) +		return; + +	ib_dealloc_xrcd(xrcd); + +	if (inode) +		xrcd_table_delete(dev, inode); +} +  ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,  			 const char __user *buf, int in_len,  			 int out_len) @@ -604,19 +930,15 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,  	if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))  		return -EINVAL; -	/* -	 * Local write permission is required if remote write or -	 * remote atomic permission is also requested. -	 */ -	if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && -	    !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) -		return -EINVAL; +	ret = ib_check_mr_access(cmd.access_flags); +	if (ret) +		return ret;  	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);  	if (!uobj)  		return -ENOMEM; -	init_uobj(uobj, 0, file->ucontext, &mr_lock_key); +	init_uobj(uobj, 0, file->ucontext, &mr_lock_class);  	down_write(&uobj->mutex);  	pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -718,6 +1040,126 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,  	return in_len;  } +ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, +			 const char __user *buf, int in_len, +			 int out_len) +{ +	struct ib_uverbs_alloc_mw      cmd; +	struct ib_uverbs_alloc_mw_resp resp; +	struct ib_uobject             *uobj; +	struct ib_pd                  *pd; +	struct ib_mw                  *mw; +	int                            ret; + +	if (out_len < sizeof(resp)) +		return -ENOSPC; + +	if (copy_from_user(&cmd, buf, sizeof(cmd))) +		return -EFAULT; + +	uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); +	if (!uobj) +		return -ENOMEM; + +	init_uobj(uobj, 0, file->ucontext, &mw_lock_class); +	down_write(&uobj->mutex); + +	pd = idr_read_pd(cmd.pd_handle, file->ucontext); +	if (!pd) { +		ret = -EINVAL; +		goto err_free; +	} + +	mw = pd->device->alloc_mw(pd, cmd.mw_type); +	if (IS_ERR(mw)) { +		ret = PTR_ERR(mw); +		goto err_put; +	} + +	mw->device  = pd->device; +	mw->pd      = pd; +	mw->uobject = uobj; +	atomic_inc(&pd->usecnt); + +	uobj->object = mw; +	ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); +	if (ret) +		goto err_unalloc; + +	memset(&resp, 0, sizeof(resp)); +	resp.rkey      = mw->rkey; +	resp.mw_handle = uobj->id; + +	if (copy_to_user((void __user *)(unsigned long)cmd.response, +			 &resp, sizeof(resp))) { +		ret = -EFAULT; +		goto err_copy; +	} + +	put_pd_read(pd); + +	mutex_lock(&file->mutex); +	list_add_tail(&uobj->list, &file->ucontext->mw_list); +	mutex_unlock(&file->mutex); + +	uobj->live = 1; + +	up_write(&uobj->mutex); + +	return in_len; + +err_copy: +	idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + +err_unalloc: +	ib_dealloc_mw(mw); + +err_put: +	put_pd_read(pd); + +err_free: +	put_uobj_write(uobj); +	return ret; +} + +ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, +			   const char __user *buf, int in_len, +			   int out_len) +{ +	struct ib_uverbs_dealloc_mw cmd; +	struct ib_mw               *mw; +	struct ib_uobject	   *uobj; +	int                         ret = -EINVAL; + +	if (copy_from_user(&cmd, buf, sizeof(cmd))) +		return -EFAULT; + +	uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); +	if (!uobj) +		return -EINVAL; + +	mw = uobj->object; + +	ret = ib_dealloc_mw(mw); +	if (!ret) +		uobj->live = 0; + +	put_uobj_write(uobj); + +	if (ret) +		return ret; + +	idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + +	mutex_lock(&file->mutex); +	list_del(&uobj->list); +	mutex_unlock(&file->mutex); + +	put_uobj(uobj); + +	return in_len; +} +  ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,  				      const char __user *buf, int in_len,  				      int out_len) @@ -733,7 +1175,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,  	if (copy_from_user(&cmd, buf, sizeof cmd))  		return -EFAULT; -	ret = get_unused_fd(); +	ret = get_unused_fd_flags(O_CLOEXEC);  	if (ret < 0)  		return ret;  	resp.fd = ret; @@ -784,7 +1226,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,  	if (!obj)  		return -ENOMEM; -	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key); +	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class);  	down_write(&obj->uobject.mutex);  	if (cmd.comp_channel >= 0) { @@ -893,68 +1335,81 @@ out:  	return ret ? ret : in_len;  } +static int copy_wc_to_user(void __user *dest, struct ib_wc *wc) +{ +	struct ib_uverbs_wc tmp; + +	tmp.wr_id		= wc->wr_id; +	tmp.status		= wc->status; +	tmp.opcode		= wc->opcode; +	tmp.vendor_err		= wc->vendor_err; +	tmp.byte_len		= wc->byte_len; +	tmp.ex.imm_data		= (__u32 __force) wc->ex.imm_data; +	tmp.qp_num		= wc->qp->qp_num; +	tmp.src_qp		= wc->src_qp; +	tmp.wc_flags		= wc->wc_flags; +	tmp.pkey_index		= wc->pkey_index; +	tmp.slid		= wc->slid; +	tmp.sl			= wc->sl; +	tmp.dlid_path_bits	= wc->dlid_path_bits; +	tmp.port_num		= wc->port_num; +	tmp.reserved		= 0; + +	if (copy_to_user(dest, &tmp, sizeof tmp)) +		return -EFAULT; + +	return 0; +} +  ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,  			  const char __user *buf, int in_len,  			  int out_len)  {  	struct ib_uverbs_poll_cq       cmd; -	struct ib_uverbs_poll_cq_resp *resp; +	struct ib_uverbs_poll_cq_resp  resp; +	u8 __user                     *header_ptr; +	u8 __user                     *data_ptr;  	struct ib_cq                  *cq; -	struct ib_wc                  *wc; -	int                            ret = 0; -	int                            i; -	int                            rsize; +	struct ib_wc                   wc; +	int                            ret;  	if (copy_from_user(&cmd, buf, sizeof cmd))  		return -EFAULT; -	wc = kmalloc(cmd.ne * sizeof *wc, GFP_KERNEL); -	if (!wc) -		return -ENOMEM; +	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); +	if (!cq) +		return -EINVAL; -	rsize = sizeof *resp + cmd.ne * sizeof(struct ib_uverbs_wc); -	resp = kmalloc(rsize, GFP_KERNEL); -	if (!resp) { -		ret = -ENOMEM; -		goto out_wc; -	} +	/* we copy a struct ib_uverbs_poll_cq_resp to user space */ +	header_ptr = (void __user *)(unsigned long) cmd.response; +	data_ptr = header_ptr + sizeof resp; -	cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0); -	if (!cq) { -		ret = -EINVAL; -		goto out; -	} +	memset(&resp, 0, sizeof resp); +	while (resp.count < cmd.ne) { +		ret = ib_poll_cq(cq, 1, &wc); +		if (ret < 0) +			goto out_put; +		if (!ret) +			break; -	resp->count = ib_poll_cq(cq, cmd.ne, wc); +		ret = copy_wc_to_user(data_ptr, &wc); +		if (ret) +			goto out_put; -	put_cq_read(cq); +		data_ptr += sizeof(struct ib_uverbs_wc); +		++resp.count; +	} -	for (i = 0; i < resp->count; i++) { -		resp->wc[i].wr_id 	   = wc[i].wr_id; -		resp->wc[i].status 	   = wc[i].status; -		resp->wc[i].opcode 	   = wc[i].opcode; -		resp->wc[i].vendor_err 	   = wc[i].vendor_err; -		resp->wc[i].byte_len 	   = wc[i].byte_len; -		resp->wc[i].ex.imm_data    = (__u32 __force) wc[i].ex.imm_data; -		resp->wc[i].qp_num 	   = wc[i].qp->qp_num; -		resp->wc[i].src_qp 	   = wc[i].src_qp; -		resp->wc[i].wc_flags 	   = wc[i].wc_flags; -		resp->wc[i].pkey_index 	   = wc[i].pkey_index; -		resp->wc[i].slid 	   = wc[i].slid; -		resp->wc[i].sl 		   = wc[i].sl; -		resp->wc[i].dlid_path_bits = wc[i].dlid_path_bits; -		resp->wc[i].port_num 	   = wc[i].port_num; -	} - -	if (copy_to_user((void __user *) (unsigned long) cmd.response, resp, rsize)) +	if (copy_to_user(header_ptr, &resp, sizeof resp)) {  		ret = -EFAULT; +		goto out_put; +	} -out: -	kfree(resp); +	ret = in_len; -out_wc: -	kfree(wc); -	return ret ? ret : in_len; +out_put: +	put_cq_read(cq); +	return ret;  }  ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file, @@ -1039,9 +1494,12 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,  	struct ib_uverbs_create_qp_resp resp;  	struct ib_udata                 udata;  	struct ib_uqp_object           *obj; -	struct ib_pd                   *pd; -	struct ib_cq                   *scq, *rcq; -	struct ib_srq                  *srq; +	struct ib_device	       *device; +	struct ib_pd                   *pd = NULL; +	struct ib_xrcd		       *xrcd = NULL; +	struct ib_uobject	       *uninitialized_var(xrcd_uobj); +	struct ib_cq                   *scq = NULL, *rcq = NULL; +	struct ib_srq                  *srq = NULL;  	struct ib_qp                   *qp;  	struct ib_qp_init_attr          attr;  	int ret; @@ -1052,26 +1510,57 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,  	if (copy_from_user(&cmd, buf, sizeof cmd))  		return -EFAULT; +	if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) +		return -EPERM; +  	INIT_UDATA(&udata, buf + sizeof cmd,  		   (unsigned long) cmd.response + sizeof resp,  		   in_len - sizeof cmd, out_len - sizeof resp); -	obj = kmalloc(sizeof *obj, GFP_KERNEL); +	obj = kzalloc(sizeof *obj, GFP_KERNEL);  	if (!obj)  		return -ENOMEM; -	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); +	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);  	down_write(&obj->uevent.uobject.mutex); -	srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL; -	pd  = idr_read_pd(cmd.pd_handle, file->ucontext); -	scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); -	rcq = cmd.recv_cq_handle == cmd.send_cq_handle ? -		scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); +	if (cmd.qp_type == IB_QPT_XRC_TGT) { +		xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); +		if (!xrcd) { +			ret = -EINVAL; +			goto err_put; +		} +		device = xrcd->device; +	} else { +		if (cmd.qp_type == IB_QPT_XRC_INI) { +			cmd.max_recv_wr = cmd.max_recv_sge = 0; +		} else { +			if (cmd.is_srq) { +				srq = idr_read_srq(cmd.srq_handle, file->ucontext); +				if (!srq || srq->srq_type != IB_SRQT_BASIC) { +					ret = -EINVAL; +					goto err_put; +				} +			} + +			if (cmd.recv_cq_handle != cmd.send_cq_handle) { +				rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); +				if (!rcq) { +					ret = -EINVAL; +					goto err_put; +				} +			} +		} -	if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) { -		ret = -EINVAL; -		goto err_put; +		scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); +		rcq = rcq ?: scq; +		pd  = idr_read_pd(cmd.pd_handle, file->ucontext); +		if (!pd || !scq) { +			ret = -EINVAL; +			goto err_put; +		} + +		device = pd->device;  	}  	attr.event_handler = ib_uverbs_qp_event_handler; @@ -1079,6 +1568,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,  	attr.send_cq       = scq;  	attr.recv_cq       = rcq;  	attr.srq           = srq; +	attr.xrcd	   = xrcd;  	attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;  	attr.qp_type       = cmd.qp_type;  	attr.create_flags  = 0; @@ -1093,26 +1583,35 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,  	INIT_LIST_HEAD(&obj->uevent.event_list);  	INIT_LIST_HEAD(&obj->mcast_list); -	qp = pd->device->create_qp(pd, &attr, &udata); +	if (cmd.qp_type == IB_QPT_XRC_TGT) +		qp = ib_create_qp(pd, &attr); +	else +		qp = device->create_qp(pd, &attr, &udata); +  	if (IS_ERR(qp)) {  		ret = PTR_ERR(qp);  		goto err_put;  	} -	qp->device     	  = pd->device; -	qp->pd         	  = pd; -	qp->send_cq    	  = attr.send_cq; -	qp->recv_cq    	  = attr.recv_cq; -	qp->srq	       	  = attr.srq; -	qp->uobject       = &obj->uevent.uobject; -	qp->event_handler = attr.event_handler; -	qp->qp_context    = attr.qp_context; -	qp->qp_type	  = attr.qp_type; -	atomic_inc(&pd->usecnt); -	atomic_inc(&attr.send_cq->usecnt); -	atomic_inc(&attr.recv_cq->usecnt); -	if (attr.srq) -		atomic_inc(&attr.srq->usecnt); +	if (cmd.qp_type != IB_QPT_XRC_TGT) { +		qp->real_qp	  = qp; +		qp->device	  = device; +		qp->pd		  = pd; +		qp->send_cq	  = attr.send_cq; +		qp->recv_cq	  = attr.recv_cq; +		qp->srq		  = attr.srq; +		qp->event_handler = attr.event_handler; +		qp->qp_context	  = attr.qp_context; +		qp->qp_type	  = attr.qp_type; +		atomic_set(&qp->usecnt, 0); +		atomic_inc(&pd->usecnt); +		atomic_inc(&attr.send_cq->usecnt); +		if (attr.recv_cq) +			atomic_inc(&attr.recv_cq->usecnt); +		if (attr.srq) +			atomic_inc(&attr.srq->usecnt); +	} +	qp->uobject = &obj->uevent.uobject;  	obj->uevent.uobject.object = qp;  	ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); @@ -1134,9 +1633,18 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,  		goto err_copy;  	} -	put_pd_read(pd); -	put_cq_read(scq); -	if (rcq != scq) +	if (xrcd) { +		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, +					  uobject); +		atomic_inc(&obj->uxrcd->refcnt); +		put_xrcd_read(xrcd_uobj); +	} + +	if (pd) +		put_pd_read(pd); +	if (scq) +		put_cq_read(scq); +	if (rcq && rcq != scq)  		put_cq_read(rcq);  	if (srq)  		put_srq_read(srq); @@ -1158,6 +1666,8 @@ err_destroy:  	ib_destroy_qp(qp);  err_put: +	if (xrcd) +		put_xrcd_read(xrcd_uobj);  	if (pd)  		put_pd_read(pd);  	if (scq) @@ -1171,6 +1681,100 @@ err_put:  	return ret;  } +ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, +			  const char __user *buf, int in_len, int out_len) +{ +	struct ib_uverbs_open_qp        cmd; +	struct ib_uverbs_create_qp_resp resp; +	struct ib_udata                 udata; +	struct ib_uqp_object           *obj; +	struct ib_xrcd		       *xrcd; +	struct ib_uobject	       *uninitialized_var(xrcd_uobj); +	struct ib_qp                   *qp; +	struct ib_qp_open_attr          attr; +	int ret; + +	if (out_len < sizeof resp) +		return -ENOSPC; + +	if (copy_from_user(&cmd, buf, sizeof cmd)) +		return -EFAULT; + +	INIT_UDATA(&udata, buf + sizeof cmd, +		   (unsigned long) cmd.response + sizeof resp, +		   in_len - sizeof cmd, out_len - sizeof resp); + +	obj = kmalloc(sizeof *obj, GFP_KERNEL); +	if (!obj) +		return -ENOMEM; + +	init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); +	down_write(&obj->uevent.uobject.mutex); + +	xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); +	if (!xrcd) { +		ret = -EINVAL; +		goto err_put; +	} + +	attr.event_handler = ib_uverbs_qp_event_handler; +	attr.qp_context    = file; +	attr.qp_num        = cmd.qpn; +	attr.qp_type       = cmd.qp_type; + +	obj->uevent.events_reported = 0; +	INIT_LIST_HEAD(&obj->uevent.event_list); +	INIT_LIST_HEAD(&obj->mcast_list); + +	qp = ib_open_qp(xrcd, &attr); +	if (IS_ERR(qp)) { +		ret = PTR_ERR(qp); +		goto err_put; +	} + +	qp->uobject = &obj->uevent.uobject; + +	obj->uevent.uobject.object = qp; +	ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); +	if (ret) +		goto err_destroy; + +	memset(&resp, 0, sizeof resp); +	resp.qpn       = qp->qp_num; +	resp.qp_handle = obj->uevent.uobject.id; + +	if (copy_to_user((void __user *) (unsigned long) cmd.response, +			 &resp, sizeof resp)) { +		ret = -EFAULT; +		goto err_remove; +	} + +	obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); +	atomic_inc(&obj->uxrcd->refcnt); +	put_xrcd_read(xrcd_uobj); + +	mutex_lock(&file->mutex); +	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list); +	mutex_unlock(&file->mutex); + +	obj->uevent.uobject.live = 1; + +	up_write(&obj->uevent.uobject.mutex); + +	return in_len; + +err_remove: +	idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); + +err_destroy: +	ib_destroy_qp(qp); + +err_put: +	put_xrcd_read(xrcd_uobj); +	put_uobj_write(&obj->uevent.uobject); +	return ret; +} +  ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,  			   const char __user *buf, int in_len,  			   int out_len) @@ -1271,6 +1875,20 @@ out:  	return ret ? ret : in_len;  } +/* Remove ignored fields set in the attribute mask */ +static int modify_qp_mask(enum ib_qp_type qp_type, int mask) +{ +	switch (qp_type) { +	case IB_QPT_XRC_INI: +		return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER); +	case IB_QPT_XRC_TGT: +		return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT | +				IB_QP_RNR_RETRY); +	default: +		return mask; +	} +} +  ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,  			    const char __user *buf, int in_len,  			    int out_len) @@ -1343,7 +1961,15 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,  	attr->alt_ah_attr.ah_flags 	    = cmd.alt_dest.is_global ? IB_AH_GRH : 0;  	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num; -	ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata); +	if (qp->real_qp == qp) { +		ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); +		if (ret) +			goto out; +		ret = qp->device->modify_qp(qp, attr, +			modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); +	} else { +		ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); +	}  	put_qp_read(qp); @@ -1394,6 +2020,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,  	if (ret)  		return ret; +	if (obj->uxrcd) +		atomic_dec(&obj->uxrcd->refcnt); +  	idr_remove_uobj(&ib_uverbs_qp_idr, uobj);  	mutex_lock(&file->mutex); @@ -1489,6 +2118,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,  			}  			next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;  			next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; +			if (next->opcode == IB_WR_SEND_WITH_IMM) +				next->ex.imm_data = +					(__be32 __force) user_wr->ex.imm_data;  		} else {  			switch (next->opcode) {  			case IB_WR_RDMA_WRITE_WITH_IMM: @@ -1540,7 +2172,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,  	}  	resp.bad_wr = 0; -	ret = qp->device->post_send(qp, wr, &bad_wr); +	ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);  	if (ret)  		for (next = wr; next; next = next->next) {  			++resp.bad_wr; @@ -1678,7 +2310,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,  		goto out;  	resp.bad_wr = 0; -	ret = qp->device->post_recv(qp, wr, &bad_wr); +	ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);  	put_qp_read(qp); @@ -1774,7 +2406,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,  	if (!uobj)  		return -ENOMEM; -	init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key); +	init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class);  	down_write(&uobj->mutex);  	pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -1891,7 +2523,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,  	if (copy_from_user(&cmd, buf, sizeof cmd))  		return -EFAULT; -	qp = idr_read_qp(cmd.qp_handle, file->ucontext); +	qp = idr_write_qp(cmd.qp_handle, file->ucontext);  	if (!qp)  		return -EINVAL; @@ -1920,7 +2552,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,  		kfree(mcast);  out_put: -	put_qp_read(qp); +	put_qp_write(qp);  	return ret ? ret : in_len;  } @@ -1938,7 +2570,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,  	if (copy_from_user(&cmd, buf, sizeof cmd))  		return -EFAULT; -	qp = idr_read_qp(cmd.qp_handle, file->ucontext); +	qp = idr_write_qp(cmd.qp_handle, file->ucontext);  	if (!qp)  		return -EINVAL; @@ -1957,100 +2589,367 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,  		}  out_put: -	put_qp_read(qp); +	put_qp_write(qp);  	return ret ? ret : in_len;  } -ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, -			     const char __user *buf, int in_len, -			     int out_len) +static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, +				union ib_flow_spec *ib_spec) +{ +	if (kern_spec->reserved) +		return -EINVAL; + +	ib_spec->type = kern_spec->type; + +	switch (ib_spec->type) { +	case IB_FLOW_SPEC_ETH: +		ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); +		if (ib_spec->eth.size != kern_spec->eth.size) +			return -EINVAL; +		memcpy(&ib_spec->eth.val, &kern_spec->eth.val, +		       sizeof(struct ib_flow_eth_filter)); +		memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, +		       sizeof(struct ib_flow_eth_filter)); +		break; +	case IB_FLOW_SPEC_IPV4: +		ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); +		if (ib_spec->ipv4.size != kern_spec->ipv4.size) +			return -EINVAL; +		memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, +		       sizeof(struct ib_flow_ipv4_filter)); +		memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, +		       sizeof(struct ib_flow_ipv4_filter)); +		break; +	case IB_FLOW_SPEC_TCP: +	case IB_FLOW_SPEC_UDP: +		ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); +		if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size) +			return -EINVAL; +		memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, +		       sizeof(struct ib_flow_tcp_udp_filter)); +		memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, +		       sizeof(struct ib_flow_tcp_udp_filter)); +		break; +	default: +		return -EINVAL; +	} +	return 0; +} + +int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, +			     struct ib_udata *ucore, +			     struct ib_udata *uhw) +{ +	struct ib_uverbs_create_flow	  cmd; +	struct ib_uverbs_create_flow_resp resp; +	struct ib_uobject		  *uobj; +	struct ib_flow			  *flow_id; +	struct ib_uverbs_flow_attr	  *kern_flow_attr; +	struct ib_flow_attr		  *flow_attr; +	struct ib_qp			  *qp; +	int err = 0; +	void *kern_spec; +	void *ib_spec; +	int i; + +	if (ucore->inlen < sizeof(cmd)) +		return -EINVAL; + +	if (ucore->outlen < sizeof(resp)) +		return -ENOSPC; + +	err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); +	if (err) +		return err; + +	ucore->inbuf += sizeof(cmd); +	ucore->inlen -= sizeof(cmd); + +	if (cmd.comp_mask) +		return -EINVAL; + +	if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && +	     !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) +		return -EPERM; + +	if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) +		return -EINVAL; + +	if (cmd.flow_attr.size > ucore->inlen || +	    cmd.flow_attr.size > +	    (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) +		return -EINVAL; + +	if (cmd.flow_attr.reserved[0] || +	    cmd.flow_attr.reserved[1]) +		return -EINVAL; + +	if (cmd.flow_attr.num_of_specs) { +		kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size, +					 GFP_KERNEL); +		if (!kern_flow_attr) +			return -ENOMEM; + +		memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); +		err = ib_copy_from_udata(kern_flow_attr + 1, ucore, +					 cmd.flow_attr.size); +		if (err) +			goto err_free_attr; +	} else { +		kern_flow_attr = &cmd.flow_attr; +	} + +	uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); +	if (!uobj) { +		err = -ENOMEM; +		goto err_free_attr; +	} +	init_uobj(uobj, 0, file->ucontext, &rule_lock_class); +	down_write(&uobj->mutex); + +	qp = idr_read_qp(cmd.qp_handle, file->ucontext); +	if (!qp) { +		err = -EINVAL; +		goto err_uobj; +	} + +	flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL); +	if (!flow_attr) { +		err = -ENOMEM; +		goto err_put; +	} + +	flow_attr->type = kern_flow_attr->type; +	flow_attr->priority = kern_flow_attr->priority; +	flow_attr->num_of_specs = kern_flow_attr->num_of_specs; +	flow_attr->port = kern_flow_attr->port; +	flow_attr->flags = kern_flow_attr->flags; +	flow_attr->size = sizeof(*flow_attr); + +	kern_spec = kern_flow_attr + 1; +	ib_spec = flow_attr + 1; +	for (i = 0; i < flow_attr->num_of_specs && +	     cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && +	     cmd.flow_attr.size >= +	     ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { +		err = kern_spec_to_ib_spec(kern_spec, ib_spec); +		if (err) +			goto err_free; +		flow_attr->size += +			((union ib_flow_spec *) ib_spec)->size; +		cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; +		kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size; +		ib_spec += ((union ib_flow_spec *) ib_spec)->size; +	} +	if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { +		pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", +			i, cmd.flow_attr.size); +		err = -EINVAL; +		goto err_free; +	} +	flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); +	if (IS_ERR(flow_id)) { +		err = PTR_ERR(flow_id); +		goto err_free; +	} +	flow_id->qp = qp; +	flow_id->uobject = uobj; +	uobj->object = flow_id; + +	err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); +	if (err) +		goto destroy_flow; + +	memset(&resp, 0, sizeof(resp)); +	resp.flow_handle = uobj->id; + +	err = ib_copy_to_udata(ucore, +			       &resp, sizeof(resp)); +	if (err) +		goto err_copy; + +	put_qp_read(qp); +	mutex_lock(&file->mutex); +	list_add_tail(&uobj->list, &file->ucontext->rule_list); +	mutex_unlock(&file->mutex); + +	uobj->live = 1; + +	up_write(&uobj->mutex); +	kfree(flow_attr); +	if (cmd.flow_attr.num_of_specs) +		kfree(kern_flow_attr); +	return 0; +err_copy: +	idr_remove_uobj(&ib_uverbs_rule_idr, uobj); +destroy_flow: +	ib_destroy_flow(flow_id); +err_free: +	kfree(flow_attr); +err_put: +	put_qp_read(qp); +err_uobj: +	put_uobj_write(uobj); +err_free_attr: +	if (cmd.flow_attr.num_of_specs) +		kfree(kern_flow_attr); +	return err; +} + +int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, +			      struct ib_udata *ucore, +			      struct ib_udata *uhw) +{ +	struct ib_uverbs_destroy_flow	cmd; +	struct ib_flow			*flow_id; +	struct ib_uobject		*uobj; +	int				ret; + +	if (ucore->inlen < sizeof(cmd)) +		return -EINVAL; + +	ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); +	if (ret) +		return ret; + +	if (cmd.comp_mask) +		return -EINVAL; + +	uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, +			      file->ucontext); +	if (!uobj) +		return -EINVAL; +	flow_id = uobj->object; + +	ret = ib_destroy_flow(flow_id); +	if (!ret) +		uobj->live = 0; + +	put_uobj_write(uobj); + +	idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + +	mutex_lock(&file->mutex); +	list_del(&uobj->list); +	mutex_unlock(&file->mutex); + +	put_uobj(uobj); + +	return ret; +} + +static int __uverbs_create_xsrq(struct ib_uverbs_file *file, +				struct ib_uverbs_create_xsrq *cmd, +				struct ib_udata *udata)  { -	struct ib_uverbs_create_srq      cmd;  	struct ib_uverbs_create_srq_resp resp; -	struct ib_udata                  udata; -	struct ib_uevent_object         *obj; +	struct ib_usrq_object           *obj;  	struct ib_pd                    *pd;  	struct ib_srq                   *srq; +	struct ib_uobject               *uninitialized_var(xrcd_uobj);  	struct ib_srq_init_attr          attr;  	int ret; -	if (out_len < sizeof resp) -		return -ENOSPC; - -	if (copy_from_user(&cmd, buf, sizeof cmd)) -		return -EFAULT; - -	INIT_UDATA(&udata, buf + sizeof cmd, -		   (unsigned long) cmd.response + sizeof resp, -		   in_len - sizeof cmd, out_len - sizeof resp); -  	obj = kmalloc(sizeof *obj, GFP_KERNEL);  	if (!obj)  		return -ENOMEM; -	init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key); -	down_write(&obj->uobject.mutex); +	init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); +	down_write(&obj->uevent.uobject.mutex); + +	if (cmd->srq_type == IB_SRQT_XRC) { +		attr.ext.xrc.xrcd  = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); +		if (!attr.ext.xrc.xrcd) { +			ret = -EINVAL; +			goto err; +		} -	pd  = idr_read_pd(cmd.pd_handle, file->ucontext); +		obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); +		atomic_inc(&obj->uxrcd->refcnt); + +		attr.ext.xrc.cq  = idr_read_cq(cmd->cq_handle, file->ucontext, 0); +		if (!attr.ext.xrc.cq) { +			ret = -EINVAL; +			goto err_put_xrcd; +		} +	} + +	pd  = idr_read_pd(cmd->pd_handle, file->ucontext);  	if (!pd) {  		ret = -EINVAL; -		goto err; +		goto err_put_cq;  	}  	attr.event_handler  = ib_uverbs_srq_event_handler;  	attr.srq_context    = file; -	attr.attr.max_wr    = cmd.max_wr; -	attr.attr.max_sge   = cmd.max_sge; -	attr.attr.srq_limit = cmd.srq_limit; +	attr.srq_type       = cmd->srq_type; +	attr.attr.max_wr    = cmd->max_wr; +	attr.attr.max_sge   = cmd->max_sge; +	attr.attr.srq_limit = cmd->srq_limit; -	obj->events_reported     = 0; -	INIT_LIST_HEAD(&obj->event_list); +	obj->uevent.events_reported = 0; +	INIT_LIST_HEAD(&obj->uevent.event_list); -	srq = pd->device->create_srq(pd, &attr, &udata); +	srq = pd->device->create_srq(pd, &attr, udata);  	if (IS_ERR(srq)) {  		ret = PTR_ERR(srq);  		goto err_put;  	} -	srq->device    	   = pd->device; -	srq->pd        	   = pd; -	srq->uobject       = &obj->uobject; +	srq->device        = pd->device; +	srq->pd            = pd; +	srq->srq_type	   = cmd->srq_type; +	srq->uobject       = &obj->uevent.uobject;  	srq->event_handler = attr.event_handler;  	srq->srq_context   = attr.srq_context; + +	if (cmd->srq_type == IB_SRQT_XRC) { +		srq->ext.xrc.cq   = attr.ext.xrc.cq; +		srq->ext.xrc.xrcd = attr.ext.xrc.xrcd; +		atomic_inc(&attr.ext.xrc.cq->usecnt); +		atomic_inc(&attr.ext.xrc.xrcd->usecnt); +	} +  	atomic_inc(&pd->usecnt);  	atomic_set(&srq->usecnt, 0); -	obj->uobject.object = srq; -	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject); +	obj->uevent.uobject.object = srq; +	ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);  	if (ret)  		goto err_destroy;  	memset(&resp, 0, sizeof resp); -	resp.srq_handle = obj->uobject.id; +	resp.srq_handle = obj->uevent.uobject.id;  	resp.max_wr     = attr.attr.max_wr;  	resp.max_sge    = attr.attr.max_sge; +	if (cmd->srq_type == IB_SRQT_XRC) +		resp.srqn = srq->ext.xrc.srq_num; -	if (copy_to_user((void __user *) (unsigned long) cmd.response, +	if (copy_to_user((void __user *) (unsigned long) cmd->response,  			 &resp, sizeof resp)) {  		ret = -EFAULT;  		goto err_copy;  	} +	if (cmd->srq_type == IB_SRQT_XRC) { +		put_uobj_read(xrcd_uobj); +		put_cq_read(attr.ext.xrc.cq); +	}  	put_pd_read(pd);  	mutex_lock(&file->mutex); -	list_add_tail(&obj->uobject.list, &file->ucontext->srq_list); +	list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list);  	mutex_unlock(&file->mutex); -	obj->uobject.live = 1; +	obj->uevent.uobject.live = 1; -	up_write(&obj->uobject.mutex); +	up_write(&obj->uevent.uobject.mutex); -	return in_len; +	return 0;  err_copy: -	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject); +	idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject);  err_destroy:  	ib_destroy_srq(srq); @@ -2058,11 +2957,81 @@ err_destroy:  err_put:  	put_pd_read(pd); +err_put_cq: +	if (cmd->srq_type == IB_SRQT_XRC) +		put_cq_read(attr.ext.xrc.cq); + +err_put_xrcd: +	if (cmd->srq_type == IB_SRQT_XRC) { +		atomic_dec(&obj->uxrcd->refcnt); +		put_uobj_read(xrcd_uobj); +	} +  err: -	put_uobj_write(&obj->uobject); +	put_uobj_write(&obj->uevent.uobject);  	return ret;  } +ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file, +			     const char __user *buf, int in_len, +			     int out_len) +{ +	struct ib_uverbs_create_srq      cmd; +	struct ib_uverbs_create_xsrq     xcmd; +	struct ib_uverbs_create_srq_resp resp; +	struct ib_udata                  udata; +	int ret; + +	if (out_len < sizeof resp) +		return -ENOSPC; + +	if (copy_from_user(&cmd, buf, sizeof cmd)) +		return -EFAULT; + +	xcmd.response	 = cmd.response; +	xcmd.user_handle = cmd.user_handle; +	xcmd.srq_type	 = IB_SRQT_BASIC; +	xcmd.pd_handle	 = cmd.pd_handle; +	xcmd.max_wr	 = cmd.max_wr; +	xcmd.max_sge	 = cmd.max_sge; +	xcmd.srq_limit	 = cmd.srq_limit; + +	INIT_UDATA(&udata, buf + sizeof cmd, +		   (unsigned long) cmd.response + sizeof resp, +		   in_len - sizeof cmd, out_len - sizeof resp); + +	ret = __uverbs_create_xsrq(file, &xcmd, &udata); +	if (ret) +		return ret; + +	return in_len; +} + +ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file, +			      const char __user *buf, int in_len, int out_len) +{ +	struct ib_uverbs_create_xsrq     cmd; +	struct ib_uverbs_create_srq_resp resp; +	struct ib_udata                  udata; +	int ret; + +	if (out_len < sizeof resp) +		return -ENOSPC; + +	if (copy_from_user(&cmd, buf, sizeof cmd)) +		return -EFAULT; + +	INIT_UDATA(&udata, buf + sizeof cmd, +		   (unsigned long) cmd.response + sizeof resp, +		   in_len - sizeof cmd, out_len - sizeof resp); + +	ret = __uverbs_create_xsrq(file, &cmd, &udata); +	if (ret) +		return ret; + +	return in_len; +} +  ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,  			     const char __user *buf, int in_len,  			     int out_len) @@ -2143,6 +3112,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,  	struct ib_srq               	 *srq;  	struct ib_uevent_object        	 *obj;  	int                         	  ret = -EINVAL; +	struct ib_usrq_object		 *us; +	enum ib_srq_type		  srq_type;  	if (copy_from_user(&cmd, buf, sizeof cmd))  		return -EFAULT; @@ -2152,6 +3123,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,  		return -EINVAL;  	srq = uobj->object;  	obj = container_of(uobj, struct ib_uevent_object, uobject); +	srq_type = srq->srq_type;  	ret = ib_destroy_srq(srq);  	if (!ret) @@ -2162,6 +3134,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,  	if (ret)  		return ret; +	if (srq_type == IB_SRQT_XRC) { +		us = container_of(obj, struct ib_usrq_object, uevent); +		atomic_dec(&us->uxrcd->refcnt); +	} +  	idr_remove_uobj(&ib_uverbs_srq_idr, uobj);  	mutex_lock(&file->mutex); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index ec83e9fe387..08219fb3338 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -72,6 +72,8 @@ DEFINE_IDR(ib_uverbs_ah_idr);  DEFINE_IDR(ib_uverbs_cq_idr);  DEFINE_IDR(ib_uverbs_qp_idr);  DEFINE_IDR(ib_uverbs_srq_idr); +DEFINE_IDR(ib_uverbs_xrcd_idr); +DEFINE_IDR(ib_uverbs_rule_idr);  static DEFINE_SPINLOCK(map_lock);  static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -86,6 +88,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,  	[IB_USER_VERBS_CMD_DEALLOC_PD]		= ib_uverbs_dealloc_pd,  	[IB_USER_VERBS_CMD_REG_MR]		= ib_uverbs_reg_mr,  	[IB_USER_VERBS_CMD_DEREG_MR]		= ib_uverbs_dereg_mr, +	[IB_USER_VERBS_CMD_ALLOC_MW]		= ib_uverbs_alloc_mw, +	[IB_USER_VERBS_CMD_DEALLOC_MW]		= ib_uverbs_dealloc_mw,  	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,  	[IB_USER_VERBS_CMD_CREATE_CQ]		= ib_uverbs_create_cq,  	[IB_USER_VERBS_CMD_RESIZE_CQ]		= ib_uverbs_resize_cq, @@ -107,6 +111,17 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,  	[IB_USER_VERBS_CMD_MODIFY_SRQ]		= ib_uverbs_modify_srq,  	[IB_USER_VERBS_CMD_QUERY_SRQ]		= ib_uverbs_query_srq,  	[IB_USER_VERBS_CMD_DESTROY_SRQ]		= ib_uverbs_destroy_srq, +	[IB_USER_VERBS_CMD_OPEN_XRCD]		= ib_uverbs_open_xrcd, +	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd, +	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq, +	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp, +}; + +static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, +				    struct ib_udata *ucore, +				    struct ib_udata *uhw) = { +	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow, +	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow  };  static void ib_uverbs_add_one(struct ib_device *device); @@ -196,14 +211,35 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,  		kfree(uobj);  	} +	/* Remove MWs before QPs, in order to support type 2A MWs. */ +	list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { +		struct ib_mw *mw = uobj->object; + +		idr_remove_uobj(&ib_uverbs_mw_idr, uobj); +		ib_dealloc_mw(mw); +		kfree(uobj); +	} + +	list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { +		struct ib_flow *flow_id = uobj->object; + +		idr_remove_uobj(&ib_uverbs_rule_idr, uobj); +		ib_destroy_flow(flow_id); +		kfree(uobj); +	} +  	list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) {  		struct ib_qp *qp = uobj->object;  		struct ib_uqp_object *uqp =  			container_of(uobj, struct ib_uqp_object, uevent.uobject);  		idr_remove_uobj(&ib_uverbs_qp_idr, uobj); -		ib_uverbs_detach_umcast(qp, uqp); -		ib_destroy_qp(qp); +		if (qp != qp->real_qp) { +			ib_close_qp(qp); +		} else { +			ib_uverbs_detach_umcast(qp, uqp); +			ib_destroy_qp(qp); +		}  		ib_uverbs_release_uevent(file, &uqp->uevent);  		kfree(uqp);  	} @@ -231,8 +267,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,  		kfree(uevent);  	} -	/* XXX Free MWs */ -  	list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {  		struct ib_mr *mr = uobj->object; @@ -241,6 +275,18 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,  		kfree(uobj);  	} +	mutex_lock(&file->device->xrcd_tree_mutex); +	list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) { +		struct ib_xrcd *xrcd = uobj->object; +		struct ib_uxrcd_object *uxrcd = +			container_of(uobj, struct ib_uxrcd_object, uobject); + +		idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj); +		ib_uverbs_dealloc_xrcd(file->device, xrcd); +		kfree(uxrcd); +	} +	mutex_unlock(&file->device->xrcd_tree_mutex); +  	list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {  		struct ib_pd *pd = uobj->object; @@ -520,16 +566,15 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,  struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)  {  	struct ib_uverbs_event_file *ev_file = NULL; -	struct file *filp; +	struct fd f = fdget(fd); -	filp = fget(fd); -	if (!filp) +	if (!f.file)  		return NULL; -	if (filp->f_op != &uverbs_event_fops) +	if (f.file->f_op != &uverbs_event_fops)  		goto out; -	ev_file = filp->private_data; +	ev_file = f.file->private_data;  	if (ev_file->is_async) {  		ev_file = NULL;  		goto out; @@ -538,7 +583,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)  	kref_get(&ev_file->ref);  out: -	fput(filp); +	fdput(f);  	return ev_file;  } @@ -547,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,  {  	struct ib_uverbs_file *file = filp->private_data;  	struct ib_uverbs_cmd_hdr hdr; +	__u32 flags;  	if (count < sizeof hdr)  		return -EINVAL; @@ -554,23 +600,110 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,  	if (copy_from_user(&hdr, buf, sizeof hdr))  		return -EFAULT; -	if (hdr.in_words * 4 != count) -		return -EINVAL; +	flags = (hdr.command & +		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; -	if (hdr.command < 0				|| -	    hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || -	    !uverbs_cmd_table[hdr.command]) -		return -EINVAL; +	if (!flags) { +		__u32 command; -	if (!file->ucontext && -	    hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) -		return -EINVAL; +		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | +					   IB_USER_VERBS_CMD_COMMAND_MASK)) +			return -EINVAL; + +		command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + +		if (command >= ARRAY_SIZE(uverbs_cmd_table) || +		    !uverbs_cmd_table[command]) +			return -EINVAL; + +		if (!file->ucontext && +		    command != IB_USER_VERBS_CMD_GET_CONTEXT) +			return -EINVAL; + +		if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command))) +			return -ENOSYS; + +		if (hdr.in_words * 4 != count) +			return -EINVAL; + +		return uverbs_cmd_table[command](file, +						 buf + sizeof(hdr), +						 hdr.in_words * 4, +						 hdr.out_words * 4); -	if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) -		return -ENOSYS; +	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { +		__u32 command; -	return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, -					     hdr.in_words * 4, hdr.out_words * 4); +		struct ib_uverbs_ex_cmd_hdr ex_hdr; +		struct ib_udata ucore; +		struct ib_udata uhw; +		int err; +		size_t written_count = count; + +		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | +					   IB_USER_VERBS_CMD_COMMAND_MASK)) +			return -EINVAL; + +		command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + +		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || +		    !uverbs_ex_cmd_table[command]) +			return -ENOSYS; + +		if (!file->ucontext) +			return -EINVAL; + +		if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command))) +			return -ENOSYS; + +		if (count < (sizeof(hdr) + sizeof(ex_hdr))) +			return -EINVAL; + +		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) +			return -EFAULT; + +		count -= sizeof(hdr) + sizeof(ex_hdr); +		buf += sizeof(hdr) + sizeof(ex_hdr); + +		if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) +			return -EINVAL; + +		if (ex_hdr.cmd_hdr_reserved) +			return -EINVAL; + +		if (ex_hdr.response) { +			if (!hdr.out_words && !ex_hdr.provider_out_words) +				return -EINVAL; + +			if (!access_ok(VERIFY_WRITE, +				       (void __user *) (unsigned long) ex_hdr.response, +				       (hdr.out_words + ex_hdr.provider_out_words) * 8)) +				return -EFAULT; +		} else { +			if (hdr.out_words || ex_hdr.provider_out_words) +				return -EINVAL; +		} + +		INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, +				       hdr.in_words * 8, hdr.out_words * 8); + +		INIT_UDATA_BUF_OR_NULL(&uhw, +				       buf + ucore.inlen, +				       (unsigned long) ex_hdr.response + ucore.outlen, +				       ex_hdr.provider_in_words * 8, +				       ex_hdr.provider_out_words * 8); + +		err = uverbs_ex_cmd_table[command](file, +						   &ucore, +						   &uhw); + +		if (err) +			return err; + +		return written_count; +	} + +	return -ENOSYS;  }  static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) @@ -741,6 +874,8 @@ static void ib_uverbs_add_one(struct ib_device *device)  	kref_init(&uverbs_dev->ref);  	init_completion(&uverbs_dev->comp); +	uverbs_dev->xrcd_tree = RB_ROOT; +	mutex_init(&uverbs_dev->xrcd_tree_mutex);  	spin_lock(&map_lock);  	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); @@ -824,6 +959,13 @@ static void ib_uverbs_remove_one(struct ib_device *device)  	kfree(uverbs_dev);  } +static char *uverbs_devnode(struct device *dev, umode_t *mode) +{ +	if (mode) +		*mode = 0666; +	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} +  static int __init ib_uverbs_init(void)  {  	int ret; @@ -842,6 +984,8 @@ static int __init ib_uverbs_init(void)  		goto out_chrdev;  	} +	uverbs_class->devnode = uverbs_devnode; +  	ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);  	if (ret) {  		printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index 5440da0e59b..e7bee46868d 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -30,6 +30,7 @@   * SOFTWARE.   */ +#include <linux/export.h>  #include <rdma/ib_marshall.h>  void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, @@ -40,18 +41,21 @@ void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,  	dst->grh.sgid_index        = src->grh.sgid_index;  	dst->grh.hop_limit         = src->grh.hop_limit;  	dst->grh.traffic_class     = src->grh.traffic_class; +	memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved));  	dst->dlid 	    	   = src->dlid;  	dst->sl   	    	   = src->sl;  	dst->src_path_bits 	   = src->src_path_bits;  	dst->static_rate   	   = src->static_rate;  	dst->is_global             = src->ah_flags & IB_AH_GRH ? 1 : 0;  	dst->port_num 	    	   = src->port_num; +	dst->reserved 		   = 0;  }  EXPORT_SYMBOL(ib_copy_ah_attr_to_user);  void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,  			     struct ib_qp_attr *src)  { +	dst->qp_state	        = src->qp_state;  	dst->cur_qp_state	= src->cur_qp_state;  	dst->path_mtu		= src->path_mtu;  	dst->path_mig_state	= src->path_mig_state; @@ -83,6 +87,7 @@ void ib_copy_qp_attr_to_user(struct ib_uverbs_qp_attr *dst,  	dst->rnr_retry		= src->rnr_retry;  	dst->alt_port_num	= src->alt_port_num;  	dst->alt_timeout	= src->alt_timeout; +	memset(dst->reserved, 0, sizeof(dst->reserved));  }  EXPORT_SYMBOL(ib_copy_qp_attr_to_user); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index af7a8b08b2e..c2b89cc5dbc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -38,12 +38,17 @@  #include <linux/errno.h>  #include <linux/err.h> +#include <linux/export.h>  #include <linux/string.h> +#include <linux/slab.h>  #include <rdma/ib_verbs.h>  #include <rdma/ib_cache.h> +#include <rdma/ib_addr.h> -int ib_rate_to_mult(enum ib_rate rate) +#include "core_priv.h" + +__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)  {  	switch (rate) {  	case IB_RATE_2_5_GBPS: return  1; @@ -60,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate)  }  EXPORT_SYMBOL(ib_rate_to_mult); -enum ib_rate mult_to_ib_rate(int mult) +__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)  {  	switch (mult) {  	case 1:  return IB_RATE_2_5_GBPS; @@ -77,7 +82,32 @@ enum ib_rate mult_to_ib_rate(int mult)  }  EXPORT_SYMBOL(mult_to_ib_rate); -enum rdma_transport_type +__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) +{ +	switch (rate) { +	case IB_RATE_2_5_GBPS: return 2500; +	case IB_RATE_5_GBPS:   return 5000; +	case IB_RATE_10_GBPS:  return 10000; +	case IB_RATE_20_GBPS:  return 20000; +	case IB_RATE_30_GBPS:  return 30000; +	case IB_RATE_40_GBPS:  return 40000; +	case IB_RATE_60_GBPS:  return 60000; +	case IB_RATE_80_GBPS:  return 80000; +	case IB_RATE_120_GBPS: return 120000; +	case IB_RATE_14_GBPS:  return 14062; +	case IB_RATE_56_GBPS:  return 56250; +	case IB_RATE_112_GBPS: return 112500; +	case IB_RATE_168_GBPS: return 168750; +	case IB_RATE_25_GBPS:  return 25781; +	case IB_RATE_100_GBPS: return 103125; +	case IB_RATE_200_GBPS: return 206250; +	case IB_RATE_300_GBPS: return 309375; +	default:	       return -1; +	} +} +EXPORT_SYMBOL(ib_rate_to_mbps); + +__attribute_const__ enum rdma_transport_type  rdma_node_get_transport(enum rdma_node_type node_type)  {  	switch (node_type) { @@ -87,6 +117,10 @@ rdma_node_get_transport(enum rdma_node_type node_type)  		return RDMA_TRANSPORT_IB;  	case RDMA_NODE_RNIC:  		return RDMA_TRANSPORT_IWARP; +	case RDMA_NODE_USNIC: +		return RDMA_TRANSPORT_USNIC; +	case RDMA_NODE_USNIC_UDP: +		return RDMA_TRANSPORT_USNIC_UDP;  	default:  		BUG();  		return 0; @@ -103,6 +137,8 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_  	case RDMA_TRANSPORT_IB:  		return IB_LINK_LAYER_INFINIBAND;  	case RDMA_TRANSPORT_IWARP: +	case RDMA_TRANSPORT_USNIC: +	case RDMA_TRANSPORT_USNIC_UDP:  		return IB_LINK_LAYER_ETHERNET;  	default:  		return IB_LINK_LAYER_UNSPECIFIED; @@ -162,8 +198,28 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,  	u32 flow_class;  	u16 gid_index;  	int ret; +	int is_eth = (rdma_port_get_link_layer(device, port_num) == +			IB_LINK_LAYER_ETHERNET);  	memset(ah_attr, 0, sizeof *ah_attr); +	if (is_eth) { +		if (!(wc->wc_flags & IB_WC_GRH)) +			return -EPROTOTYPE; + +		if (wc->wc_flags & IB_WC_WITH_SMAC && +		    wc->wc_flags & IB_WC_WITH_VLAN) { +			memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); +			ah_attr->vlan_id = wc->vlan_id; +		} else { +			ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, +					ah_attr->dmac, &ah_attr->vlan_id); +			if (ret) +				return ret; +		} +	} else { +		ah_attr->vlan_id = 0xffff; +	} +  	ah_attr->dlid = wc->slid;  	ah_attr->sl = wc->sl;  	ah_attr->src_path_bits = wc->dlid_path_bits; @@ -250,6 +306,13 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,  		srq->uobject       = NULL;  		srq->event_handler = srq_init_attr->event_handler;  		srq->srq_context   = srq_init_attr->srq_context; +		srq->srq_type      = srq_init_attr->srq_type; +		if (srq->srq_type == IB_SRQT_XRC) { +			srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd; +			srq->ext.xrc.cq   = srq_init_attr->ext.xrc.cq; +			atomic_inc(&srq->ext.xrc.xrcd->usecnt); +			atomic_inc(&srq->ext.xrc.cq->usecnt); +		}  		atomic_inc(&pd->usecnt);  		atomic_set(&srq->usecnt, 0);  	} @@ -279,16 +342,29 @@ EXPORT_SYMBOL(ib_query_srq);  int ib_destroy_srq(struct ib_srq *srq)  {  	struct ib_pd *pd; +	enum ib_srq_type srq_type; +	struct ib_xrcd *uninitialized_var(xrcd); +	struct ib_cq *uninitialized_var(cq);  	int ret;  	if (atomic_read(&srq->usecnt))  		return -EBUSY;  	pd = srq->pd; +	srq_type = srq->srq_type; +	if (srq_type == IB_SRQT_XRC) { +		xrcd = srq->ext.xrc.xrcd; +		cq = srq->ext.xrc.cq; +	}  	ret = srq->device->destroy_srq(srq); -	if (!ret) +	if (!ret) {  		atomic_dec(&pd->usecnt); +		if (srq_type == IB_SRQT_XRC) { +			atomic_dec(&xrcd->usecnt); +			atomic_dec(&cq->usecnt); +		} +	}  	return ret;  } @@ -296,28 +372,127 @@ EXPORT_SYMBOL(ib_destroy_srq);  /* Queue pairs */ +static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) +{ +	struct ib_qp *qp = context; +	unsigned long flags; + +	spin_lock_irqsave(&qp->device->event_handler_lock, flags); +	list_for_each_entry(event->element.qp, &qp->open_list, open_list) +		if (event->element.qp->event_handler) +			event->element.qp->event_handler(event, event->element.qp->qp_context); +	spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); +} + +static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) +{ +	mutex_lock(&xrcd->tgt_qp_mutex); +	list_add(&qp->xrcd_list, &xrcd->tgt_qp_list); +	mutex_unlock(&xrcd->tgt_qp_mutex); +} + +static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp, +				  void (*event_handler)(struct ib_event *, void *), +				  void *qp_context) +{ +	struct ib_qp *qp; +	unsigned long flags; + +	qp = kzalloc(sizeof *qp, GFP_KERNEL); +	if (!qp) +		return ERR_PTR(-ENOMEM); + +	qp->real_qp = real_qp; +	atomic_inc(&real_qp->usecnt); +	qp->device = real_qp->device; +	qp->event_handler = event_handler; +	qp->qp_context = qp_context; +	qp->qp_num = real_qp->qp_num; +	qp->qp_type = real_qp->qp_type; + +	spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); +	list_add(&qp->open_list, &real_qp->open_list); +	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + +	return qp; +} + +struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, +			 struct ib_qp_open_attr *qp_open_attr) +{ +	struct ib_qp *qp, *real_qp; + +	if (qp_open_attr->qp_type != IB_QPT_XRC_TGT) +		return ERR_PTR(-EINVAL); + +	qp = ERR_PTR(-EINVAL); +	mutex_lock(&xrcd->tgt_qp_mutex); +	list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) { +		if (real_qp->qp_num == qp_open_attr->qp_num) { +			qp = __ib_open_qp(real_qp, qp_open_attr->event_handler, +					  qp_open_attr->qp_context); +			break; +		} +	} +	mutex_unlock(&xrcd->tgt_qp_mutex); +	return qp; +} +EXPORT_SYMBOL(ib_open_qp); +  struct ib_qp *ib_create_qp(struct ib_pd *pd,  			   struct ib_qp_init_attr *qp_init_attr)  { -	struct ib_qp *qp; +	struct ib_qp *qp, *real_qp; +	struct ib_device *device; -	qp = pd->device->create_qp(pd, qp_init_attr, NULL); +	device = pd ? pd->device : qp_init_attr->xrcd->device; +	qp = device->create_qp(pd, qp_init_attr, NULL);  	if (!IS_ERR(qp)) { -		qp->device     	  = pd->device; -		qp->pd         	  = pd; -		qp->send_cq    	  = qp_init_attr->send_cq; -		qp->recv_cq    	  = qp_init_attr->recv_cq; -		qp->srq	       	  = qp_init_attr->srq; -		qp->uobject       = NULL; -		qp->event_handler = qp_init_attr->event_handler; -		qp->qp_context    = qp_init_attr->qp_context; -		qp->qp_type	  = qp_init_attr->qp_type; -		atomic_inc(&pd->usecnt); -		atomic_inc(&qp_init_attr->send_cq->usecnt); -		atomic_inc(&qp_init_attr->recv_cq->usecnt); -		if (qp_init_attr->srq) -			atomic_inc(&qp_init_attr->srq->usecnt); +		qp->device     = device; +		qp->real_qp    = qp; +		qp->uobject    = NULL; +		qp->qp_type    = qp_init_attr->qp_type; + +		atomic_set(&qp->usecnt, 0); +		if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { +			qp->event_handler = __ib_shared_qp_event_handler; +			qp->qp_context = qp; +			qp->pd = NULL; +			qp->send_cq = qp->recv_cq = NULL; +			qp->srq = NULL; +			qp->xrcd = qp_init_attr->xrcd; +			atomic_inc(&qp_init_attr->xrcd->usecnt); +			INIT_LIST_HEAD(&qp->open_list); + +			real_qp = qp; +			qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, +					  qp_init_attr->qp_context); +			if (!IS_ERR(qp)) +				__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp); +			else +				real_qp->device->destroy_qp(real_qp); +		} else { +			qp->event_handler = qp_init_attr->event_handler; +			qp->qp_context = qp_init_attr->qp_context; +			if (qp_init_attr->qp_type == IB_QPT_XRC_INI) { +				qp->recv_cq = NULL; +				qp->srq = NULL; +			} else { +				qp->recv_cq = qp_init_attr->recv_cq; +				atomic_inc(&qp_init_attr->recv_cq->usecnt); +				qp->srq = qp_init_attr->srq; +				if (qp->srq) +					atomic_inc(&qp_init_attr->srq->usecnt); +			} + +			qp->pd	    = pd; +			qp->send_cq = qp_init_attr->send_cq; +			qp->xrcd    = NULL; + +			atomic_inc(&pd->usecnt); +			atomic_inc(&qp_init_attr->send_cq->usecnt); +		}  	}  	return qp; @@ -326,8 +501,10 @@ EXPORT_SYMBOL(ib_create_qp);  static const struct {  	int			valid; -	enum ib_qp_attr_mask	req_param[IB_QPT_RAW_ETHERTYPE + 1]; -	enum ib_qp_attr_mask	opt_param[IB_QPT_RAW_ETHERTYPE + 1]; +	enum ib_qp_attr_mask	req_param[IB_QPT_MAX]; +	enum ib_qp_attr_mask	req_param_add_eth[IB_QPT_MAX]; +	enum ib_qp_attr_mask	opt_param[IB_QPT_MAX]; +	enum ib_qp_attr_mask	opt_param_add_eth[IB_QPT_MAX];  } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {  	[IB_QPS_RESET] = {  		[IB_QPS_RESET] = { .valid = 1 }, @@ -337,12 +514,19 @@ static const struct {  				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|  						IB_QP_PORT			|  						IB_QP_QKEY), +				[IB_QPT_RAW_PACKET] = IB_QP_PORT,  				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|  						IB_QP_PORT			|  						IB_QP_ACCESS_FLAGS),  				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|  						IB_QP_PORT			|  						IB_QP_ACCESS_FLAGS), +				[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX		| +						IB_QP_PORT			| +						IB_QP_ACCESS_FLAGS), +				[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX		| +						IB_QP_PORT			| +						IB_QP_ACCESS_FLAGS),  				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|  						IB_QP_QKEY),  				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		| @@ -365,6 +549,12 @@ static const struct {  				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|  						IB_QP_PORT			|  						IB_QP_ACCESS_FLAGS), +				[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX		| +						IB_QP_PORT			| +						IB_QP_ACCESS_FLAGS), +				[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX		| +						IB_QP_PORT			| +						IB_QP_ACCESS_FLAGS),  				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|  						IB_QP_QKEY),  				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		| @@ -384,6 +574,22 @@ static const struct {  						IB_QP_RQ_PSN			|  						IB_QP_MAX_DEST_RD_ATOMIC	|  						IB_QP_MIN_RNR_TIMER), +				[IB_QPT_XRC_INI] = (IB_QP_AV			| +						IB_QP_PATH_MTU			| +						IB_QP_DEST_QPN			| +						IB_QP_RQ_PSN), +				[IB_QPT_XRC_TGT] = (IB_QP_AV			| +						IB_QP_PATH_MTU			| +						IB_QP_DEST_QPN			| +						IB_QP_RQ_PSN			| +						IB_QP_MAX_DEST_RD_ATOMIC	| +						IB_QP_MIN_RNR_TIMER), +			}, +			.req_param_add_eth = { +				[IB_QPT_RC]  = (IB_QP_SMAC), +				[IB_QPT_UC]  = (IB_QP_SMAC), +				[IB_QPT_XRC_INI]  = (IB_QP_SMAC), +				[IB_QPT_XRC_TGT]  = (IB_QP_SMAC)  			},  			.opt_param = {  				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		| @@ -394,11 +600,31 @@ static const struct {  				 [IB_QPT_RC]  = (IB_QP_ALT_PATH			|  						 IB_QP_ACCESS_FLAGS		|  						 IB_QP_PKEY_INDEX), +				 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH		| +						 IB_QP_ACCESS_FLAGS		| +						 IB_QP_PKEY_INDEX), +				 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH		| +						 IB_QP_ACCESS_FLAGS		| +						 IB_QP_PKEY_INDEX),  				 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|  						 IB_QP_QKEY),  				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|  						 IB_QP_QKEY), -			 } +			 }, +			.opt_param_add_eth = { +				[IB_QPT_RC]  = (IB_QP_ALT_SMAC			| +						IB_QP_VID			| +						IB_QP_ALT_VID), +				[IB_QPT_UC]  = (IB_QP_ALT_SMAC			| +						IB_QP_VID			| +						IB_QP_ALT_VID), +				[IB_QPT_XRC_INI]  = (IB_QP_ALT_SMAC			| +						IB_QP_VID			| +						IB_QP_ALT_VID), +				[IB_QPT_XRC_TGT]  = (IB_QP_ALT_SMAC			| +						IB_QP_VID			| +						IB_QP_ALT_VID) +			}  		}  	},  	[IB_QPS_RTR]   = { @@ -414,6 +640,13 @@ static const struct {  						IB_QP_RNR_RETRY			|  						IB_QP_SQ_PSN			|  						IB_QP_MAX_QP_RD_ATOMIC), +				[IB_QPT_XRC_INI] = (IB_QP_TIMEOUT		| +						IB_QP_RETRY_CNT			| +						IB_QP_RNR_RETRY			| +						IB_QP_SQ_PSN			| +						IB_QP_MAX_QP_RD_ATOMIC), +				[IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT		| +						IB_QP_SQ_PSN),  				[IB_QPT_SMI] = IB_QP_SQ_PSN,  				[IB_QPT_GSI] = IB_QP_SQ_PSN,  			}, @@ -429,6 +662,15 @@ static const struct {  						 IB_QP_ACCESS_FLAGS		|  						 IB_QP_MIN_RNR_TIMER		|  						 IB_QP_PATH_MIG_STATE), +				 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		| +						 IB_QP_ALT_PATH			| +						 IB_QP_ACCESS_FLAGS		| +						 IB_QP_PATH_MIG_STATE), +				 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		| +						 IB_QP_ALT_PATH			| +						 IB_QP_ACCESS_FLAGS		| +						 IB_QP_MIN_RNR_TIMER		| +						 IB_QP_PATH_MIG_STATE),  				 [IB_QPT_SMI] = (IB_QP_CUR_STATE		|  						 IB_QP_QKEY),  				 [IB_QPT_GSI] = (IB_QP_CUR_STATE		| @@ -453,6 +695,15 @@ static const struct {  						IB_QP_ALT_PATH			|  						IB_QP_PATH_MIG_STATE		|  						IB_QP_MIN_RNR_TIMER), +				[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		| +						IB_QP_ACCESS_FLAGS		| +						IB_QP_ALT_PATH			| +						IB_QP_PATH_MIG_STATE), +				[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		| +						IB_QP_ACCESS_FLAGS		| +						IB_QP_ALT_PATH			| +						IB_QP_PATH_MIG_STATE		| +						IB_QP_MIN_RNR_TIMER),  				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|  						IB_QP_QKEY),  				[IB_QPT_GSI] = (IB_QP_CUR_STATE			| @@ -465,6 +716,8 @@ static const struct {  				[IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,  				[IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,  				[IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY, +				[IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY, +				[IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */  				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,  				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY  			} @@ -487,6 +740,15 @@ static const struct {  						IB_QP_ACCESS_FLAGS		|  						IB_QP_MIN_RNR_TIMER		|  						IB_QP_PATH_MIG_STATE), +				[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		| +						IB_QP_ALT_PATH			| +						IB_QP_ACCESS_FLAGS		| +						IB_QP_PATH_MIG_STATE), +				[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		| +						IB_QP_ALT_PATH			| +						IB_QP_ACCESS_FLAGS		| +						IB_QP_MIN_RNR_TIMER		| +						IB_QP_PATH_MIG_STATE),  				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|  						IB_QP_QKEY),  				[IB_QPT_GSI] = (IB_QP_CUR_STATE			| @@ -515,6 +777,25 @@ static const struct {  						IB_QP_PKEY_INDEX		|  						IB_QP_MIN_RNR_TIMER		|  						IB_QP_PATH_MIG_STATE), +				[IB_QPT_XRC_INI] = (IB_QP_PORT			| +						IB_QP_AV			| +						IB_QP_TIMEOUT			| +						IB_QP_RETRY_CNT			| +						IB_QP_RNR_RETRY			| +						IB_QP_MAX_QP_RD_ATOMIC		| +						IB_QP_ALT_PATH			| +						IB_QP_ACCESS_FLAGS		| +						IB_QP_PKEY_INDEX		| +						IB_QP_PATH_MIG_STATE), +				[IB_QPT_XRC_TGT] = (IB_QP_PORT			| +						IB_QP_AV			| +						IB_QP_TIMEOUT			| +						IB_QP_MAX_DEST_RD_ATOMIC	| +						IB_QP_ALT_PATH			| +						IB_QP_ACCESS_FLAGS		| +						IB_QP_PKEY_INDEX		| +						IB_QP_MIN_RNR_TIMER		| +						IB_QP_PATH_MIG_STATE),  				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|  						IB_QP_QKEY),  				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		| @@ -546,7 +827,8 @@ static const struct {  };  int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, -		       enum ib_qp_type type, enum ib_qp_attr_mask mask) +		       enum ib_qp_type type, enum ib_qp_attr_mask mask, +		       enum rdma_link_layer ll)  {  	enum ib_qp_attr_mask req_param, opt_param; @@ -565,6 +847,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,  	req_param = qp_state_table[cur_state][next_state].req_param[type];  	opt_param = qp_state_table[cur_state][next_state].opt_param[type]; +	if (ll == IB_LINK_LAYER_ETHERNET) { +		req_param |= qp_state_table[cur_state][next_state]. +			req_param_add_eth[type]; +		opt_param |= qp_state_table[cur_state][next_state]. +			opt_param_add_eth[type]; +	} +  	if ((mask & req_param) != req_param)  		return 0; @@ -575,11 +864,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,  }  EXPORT_SYMBOL(ib_modify_qp_is_ok); +int ib_resolve_eth_l2_attrs(struct ib_qp *qp, +			    struct ib_qp_attr *qp_attr, int *qp_attr_mask) +{ +	int           ret = 0; +	union ib_gid  sgid; + +	if ((*qp_attr_mask & IB_QP_AV)  && +	    (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) { +		ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, +				   qp_attr->ah_attr.grh.sgid_index, &sgid); +		if (ret) +			goto out; +		if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { +			rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); +			rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); +			qp_attr->vlan_id = rdma_get_vlan_id(&sgid); +		} else { +			ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, +					qp_attr->ah_attr.dmac, &qp_attr->vlan_id); +			if (ret) +				goto out; +			ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); +			if (ret) +				goto out; +		} +		*qp_attr_mask |= IB_QP_SMAC; +		if (qp_attr->vlan_id < 0xFFFF) +			*qp_attr_mask |= IB_QP_VID; +	} +out: +	return ret; +} +EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); + +  int ib_modify_qp(struct ib_qp *qp,  		 struct ib_qp_attr *qp_attr,  		 int qp_attr_mask)  { -	return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL); +	int ret; + +	ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); +	if (ret) +		return ret; + +	return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);  }  EXPORT_SYMBOL(ib_modify_qp); @@ -589,11 +919,59 @@ int ib_query_qp(struct ib_qp *qp,  		struct ib_qp_init_attr *qp_init_attr)  {  	return qp->device->query_qp ? -		qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) : +		qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :  		-ENOSYS;  }  EXPORT_SYMBOL(ib_query_qp); +int ib_close_qp(struct ib_qp *qp) +{ +	struct ib_qp *real_qp; +	unsigned long flags; + +	real_qp = qp->real_qp; +	if (real_qp == qp) +		return -EINVAL; + +	spin_lock_irqsave(&real_qp->device->event_handler_lock, flags); +	list_del(&qp->open_list); +	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags); + +	atomic_dec(&real_qp->usecnt); +	kfree(qp); + +	return 0; +} +EXPORT_SYMBOL(ib_close_qp); + +static int __ib_destroy_shared_qp(struct ib_qp *qp) +{ +	struct ib_xrcd *xrcd; +	struct ib_qp *real_qp; +	int ret; + +	real_qp = qp->real_qp; +	xrcd = real_qp->xrcd; + +	mutex_lock(&xrcd->tgt_qp_mutex); +	ib_close_qp(qp); +	if (atomic_read(&real_qp->usecnt) == 0) +		list_del(&real_qp->xrcd_list); +	else +		real_qp = NULL; +	mutex_unlock(&xrcd->tgt_qp_mutex); + +	if (real_qp) { +		ret = ib_destroy_qp(real_qp); +		if (!ret) +			atomic_dec(&xrcd->usecnt); +		else +			__ib_insert_xrcd_qp(xrcd, real_qp); +	} + +	return 0; +} +  int ib_destroy_qp(struct ib_qp *qp)  {  	struct ib_pd *pd; @@ -601,16 +979,25 @@ int ib_destroy_qp(struct ib_qp *qp)  	struct ib_srq *srq;  	int ret; -	pd  = qp->pd; -	scq = qp->send_cq; -	rcq = qp->recv_cq; -	srq = qp->srq; +	if (atomic_read(&qp->usecnt)) +		return -EBUSY; + +	if (qp->real_qp != qp) +		return __ib_destroy_shared_qp(qp); + +	pd   = qp->pd; +	scq  = qp->send_cq; +	rcq  = qp->recv_cq; +	srq  = qp->srq;  	ret = qp->device->destroy_qp(qp);  	if (!ret) { -		atomic_dec(&pd->usecnt); -		atomic_dec(&scq->usecnt); -		atomic_dec(&rcq->usecnt); +		if (pd) +			atomic_dec(&pd->usecnt); +		if (scq) +			atomic_dec(&scq->usecnt); +		if (rcq) +			atomic_dec(&rcq->usecnt);  		if (srq)  			atomic_dec(&srq->usecnt);  	} @@ -671,6 +1058,11 @@ EXPORT_SYMBOL(ib_resize_cq);  struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)  {  	struct ib_mr *mr; +	int err; + +	err = ib_check_mr_access(mr_access_flags); +	if (err) +		return ERR_PTR(err);  	mr = pd->device->get_dma_mr(pd, mr_access_flags); @@ -693,6 +1085,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,  			     u64 *iova_start)  {  	struct ib_mr *mr; +	int err; + +	err = ib_check_mr_access(mr_access_flags); +	if (err) +		return ERR_PTR(err);  	if (!pd->device->reg_phys_mr)  		return ERR_PTR(-ENOSYS); @@ -723,6 +1120,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,  	struct ib_pd *old_pd;  	int ret; +	ret = ib_check_mr_access(mr_access_flags); +	if (ret) +		return ret; +  	if (!mr->device->rereg_phys_mr)  		return -ENOSYS; @@ -768,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr)  }  EXPORT_SYMBOL(ib_dereg_mr); +struct ib_mr *ib_create_mr(struct ib_pd *pd, +			   struct ib_mr_init_attr *mr_init_attr) +{ +	struct ib_mr *mr; + +	if (!pd->device->create_mr) +		return ERR_PTR(-ENOSYS); + +	mr = pd->device->create_mr(pd, mr_init_attr); + +	if (!IS_ERR(mr)) { +		mr->device  = pd->device; +		mr->pd      = pd; +		mr->uobject = NULL; +		atomic_inc(&pd->usecnt); +		atomic_set(&mr->usecnt, 0); +	} + +	return mr; +} +EXPORT_SYMBOL(ib_create_mr); + +int ib_destroy_mr(struct ib_mr *mr) +{ +	struct ib_pd *pd; +	int ret; + +	if (atomic_read(&mr->usecnt)) +		return -EBUSY; + +	pd = mr->pd; +	ret = mr->device->destroy_mr(mr); +	if (!ret) +		atomic_dec(&pd->usecnt); + +	return ret; +} +EXPORT_SYMBOL(ib_destroy_mr); +  struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)  {  	struct ib_mr *mr; @@ -816,18 +1256,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list);  /* Memory windows */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd) +struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)  {  	struct ib_mw *mw;  	if (!pd->device->alloc_mw)  		return ERR_PTR(-ENOSYS); -	mw = pd->device->alloc_mw(pd); +	mw = pd->device->alloc_mw(pd, type);  	if (!IS_ERR(mw)) {  		mw->device  = pd->device;  		mw->pd      = pd;  		mw->uobject = NULL; +		mw->type    = type;  		atomic_inc(&pd->usecnt);  	} @@ -901,22 +1342,106 @@ EXPORT_SYMBOL(ib_dealloc_fmr);  int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)  { +	int ret; +  	if (!qp->device->attach_mcast)  		return -ENOSYS;  	if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)  		return -EINVAL; -	return qp->device->attach_mcast(qp, gid, lid); +	ret = qp->device->attach_mcast(qp, gid, lid); +	if (!ret) +		atomic_inc(&qp->usecnt); +	return ret;  }  EXPORT_SYMBOL(ib_attach_mcast);  int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)  { +	int ret; +  	if (!qp->device->detach_mcast)  		return -ENOSYS;  	if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)  		return -EINVAL; -	return qp->device->detach_mcast(qp, gid, lid); +	ret = qp->device->detach_mcast(qp, gid, lid); +	if (!ret) +		atomic_dec(&qp->usecnt); +	return ret;  }  EXPORT_SYMBOL(ib_detach_mcast); + +struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device) +{ +	struct ib_xrcd *xrcd; + +	if (!device->alloc_xrcd) +		return ERR_PTR(-ENOSYS); + +	xrcd = device->alloc_xrcd(device, NULL, NULL); +	if (!IS_ERR(xrcd)) { +		xrcd->device = device; +		xrcd->inode = NULL; +		atomic_set(&xrcd->usecnt, 0); +		mutex_init(&xrcd->tgt_qp_mutex); +		INIT_LIST_HEAD(&xrcd->tgt_qp_list); +	} + +	return xrcd; +} +EXPORT_SYMBOL(ib_alloc_xrcd); + +int ib_dealloc_xrcd(struct ib_xrcd *xrcd) +{ +	struct ib_qp *qp; +	int ret; + +	if (atomic_read(&xrcd->usecnt)) +		return -EBUSY; + +	while (!list_empty(&xrcd->tgt_qp_list)) { +		qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list); +		ret = ib_destroy_qp(qp); +		if (ret) +			return ret; +	} + +	return xrcd->device->dealloc_xrcd(xrcd); +} +EXPORT_SYMBOL(ib_dealloc_xrcd); + +struct ib_flow *ib_create_flow(struct ib_qp *qp, +			       struct ib_flow_attr *flow_attr, +			       int domain) +{ +	struct ib_flow *flow_id; +	if (!qp->device->create_flow) +		return ERR_PTR(-ENOSYS); + +	flow_id = qp->device->create_flow(qp, flow_attr, domain); +	if (!IS_ERR(flow_id)) +		atomic_inc(&qp->usecnt); +	return flow_id; +} +EXPORT_SYMBOL(ib_create_flow); + +int ib_destroy_flow(struct ib_flow *flow_id) +{ +	int err; +	struct ib_qp *qp = flow_id->qp; + +	err = qp->device->destroy_flow(flow_id); +	if (!err) +		atomic_dec(&qp->usecnt); +	return err; +} +EXPORT_SYMBOL(ib_destroy_flow); + +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, +		       struct ib_mr_status *mr_status) +{ +	return mr->device->check_mr_status ? +		mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; +} +EXPORT_SYMBOL(ib_check_mr_status);  | 
