diff options
Diffstat (limited to 'drivers/infiniband/core')
| -rw-r--r-- | drivers/infiniband/core/Makefile | 7 | ||||
| -rw-r--r-- | drivers/infiniband/core/addr.c | 97 | ||||
| -rw-r--r-- | drivers/infiniband/core/cm.c | 40 | ||||
| -rw-r--r-- | drivers/infiniband/core/cma.c | 158 | ||||
| -rw-r--r-- | drivers/infiniband/core/core_priv.h | 2 | ||||
| -rw-r--r-- | drivers/infiniband/core/iwcm.c | 14 | ||||
| -rw-r--r-- | drivers/infiniband/core/iwpm_msg.c | 685 | ||||
| -rw-r--r-- | drivers/infiniband/core/iwpm_util.c | 607 | ||||
| -rw-r--r-- | drivers/infiniband/core/iwpm_util.h | 238 | ||||
| -rw-r--r-- | drivers/infiniband/core/mad.c | 14 | ||||
| -rw-r--r-- | drivers/infiniband/core/netlink.c | 20 | ||||
| -rw-r--r-- | drivers/infiniband/core/sa_query.c | 14 | ||||
| -rw-r--r-- | drivers/infiniband/core/sysfs.c | 87 | ||||
| -rw-r--r-- | drivers/infiniband/core/ucma.c | 22 | ||||
| -rw-r--r-- | drivers/infiniband/core/umem.c | 120 | ||||
| -rw-r--r-- | drivers/infiniband/core/user_mad.c | 75 | ||||
| -rw-r--r-- | drivers/infiniband/core/uverbs.h | 42 | ||||
| -rw-r--r-- | drivers/infiniband/core/uverbs_cmd.c | 124 | ||||
| -rw-r--r-- | drivers/infiniband/core/uverbs_main.c | 127 | ||||
| -rw-r--r-- | drivers/infiniband/core/verbs.c | 173 | 
20 files changed, 2348 insertions, 318 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index c8bbaef1bec..ffd0af6734a 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,8 +1,9 @@ -infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o +infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_cm.o  user_access-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_ucm.o  obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \ -					ib_cm.o iw_cm.o $(infiniband-y) +					ib_cm.o iw_cm.o ib_addr.o \ +					$(infiniband-y)  obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o  obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \  					$(user_access-y) @@ -17,7 +18,7 @@ ib_sa-y :=			sa_query.o multicast.o  ib_cm-y :=			cm.o -iw_cm-y :=			iwcm.o +iw_cm-y :=			iwcm.o iwpm_util.o iwpm_msg.o  rdma_cm-y :=			cma.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index e90f2b2eabd..8172d37f9ad 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -86,6 +86,8 @@ int rdma_addr_size(struct sockaddr *addr)  }  EXPORT_SYMBOL(rdma_addr_size); +static struct rdma_addr_client self; +  void rdma_addr_register_client(struct rdma_addr_client *client)  {  	atomic_set(&client->refcount, 1); @@ -119,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,  }  EXPORT_SYMBOL(rdma_copy_addr); -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, +		      u16 *vlan_id)  {  	struct net_device *dev;  	int ret = -EADDRNOTAVAIL; @@ -142,6 +145,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)  			return ret;  		ret = rdma_copy_addr(dev_addr, dev, NULL); +		if (vlan_id) +			*vlan_id = rdma_vlan_dev_vlan_id(dev);  		dev_put(dev);  		break; @@ -153,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)  					  &((struct sockaddr_in6 *) addr)->sin6_addr,  					  dev, 1)) {  				ret = rdma_copy_addr(dev_addr, dev, NULL); +				if (vlan_id) +					*vlan_id = rdma_vlan_dev_vlan_id(dev);  				break;  			}  		} @@ -238,7 +245,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,  	src_in->sin_addr.s_addr = fl4.saddr;  	if (rt->dst.dev->flags & IFF_LOOPBACK) { -		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); +		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);  		if (!ret)  			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);  		goto put; @@ -286,7 +293,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,  	}  	if (dst->dev->flags & IFF_LOOPBACK) { -		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); +		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);  		if (!ret)  			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);  		goto put; @@ -437,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)  }  EXPORT_SYMBOL(rdma_addr_cancel); +struct resolve_cb_context { +	struct rdma_dev_addr *addr; +	struct completion comp; +}; + +static void resolve_cb(int status, struct sockaddr *src_addr, +	     struct rdma_dev_addr *addr, void *context) +{ +	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct +				rdma_dev_addr)); +	complete(&((struct resolve_cb_context *)context)->comp); +} + +int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, +			       u16 *vlan_id) +{ +	int ret = 0; +	struct rdma_dev_addr dev_addr; +	struct resolve_cb_context ctx; +	struct net_device *dev; + +	union { +		struct sockaddr     _sockaddr; +		struct sockaddr_in  _sockaddr_in; +		struct sockaddr_in6 _sockaddr_in6; +	} sgid_addr, dgid_addr; + + +	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); +	if (ret) +		return ret; + +	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); +	if (ret) +		return ret; + +	memset(&dev_addr, 0, sizeof(dev_addr)); + +	ctx.addr = &dev_addr; +	init_completion(&ctx.comp); +	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, +			&dev_addr, 1000, resolve_cb, &ctx); +	if (ret) +		return ret; + +	wait_for_completion(&ctx.comp); + +	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); +	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); +	if (!dev) +		return -ENODEV; +	if (vlan_id) +		*vlan_id = rdma_vlan_dev_vlan_id(dev); +	dev_put(dev); +	return ret; +} +EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); + +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) +{ +	int ret = 0; +	struct rdma_dev_addr dev_addr; +	union { +		struct sockaddr     _sockaddr; +		struct sockaddr_in  _sockaddr_in; +		struct sockaddr_in6 _sockaddr_in6; +	} gid_addr; + +	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); + +	if (ret) +		return ret; +	memset(&dev_addr, 0, sizeof(dev_addr)); +	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); +	if (ret) +		return ret; + +	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); +	return ret; +} +EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); +  static int netevent_callback(struct notifier_block *self, unsigned long event,  	void *ctx)  { @@ -461,11 +550,13 @@ static int __init addr_init(void)  		return -ENOMEM;  	register_netevent_notifier(&nb); +	rdma_addr_register_client(&self);  	return 0;  }  static void __exit addr_cleanup(void)  { +	rdma_addr_unregister_client(&self);  	unregister_netevent_notifier(&nb);  	destroy_workqueue(addr_wq);  } diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 784b97cb05b..c3239170d8b 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -47,6 +47,7 @@  #include <linux/sysfs.h>  #include <linux/workqueue.h>  #include <linux/kdev_t.h> +#include <linux/etherdevice.h>  #include <rdma/ib_cache.h>  #include <rdma/ib_cm.h> @@ -177,6 +178,8 @@ struct cm_av {  	struct ib_ah_attr ah_attr;  	u16 pkey_index;  	u8 timeout; +	u8  valid; +	u8  smac[ETH_ALEN];  };  struct cm_work { @@ -376,6 +379,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)  	ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,  			     &av->ah_attr);  	av->timeout = path->packet_life_time + 1; +	memcpy(av->smac, path->smac, sizeof(av->smac)); + +	av->valid = 1;  	return 0;  } @@ -383,14 +389,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)  {  	unsigned long flags;  	int id; -	static int next_id;  	idr_preload(GFP_KERNEL);  	spin_lock_irqsave(&cm.lock, flags); -	id = idr_alloc(&cm.local_id_table, cm_id_priv, next_id, 0, GFP_NOWAIT); -	if (id >= 0) -		next_id = max(id + 1, 0); +	id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);  	spin_unlock_irqrestore(&cm.lock, flags);  	idr_preload_end(); @@ -1557,6 +1560,9 @@ static int cm_req_handler(struct cm_work *work)  	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);  	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); + +	memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); +	work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;  	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);  	if (ret) {  		ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -3503,6 +3509,32 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,  		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |  				IB_QP_DEST_QPN | IB_QP_RQ_PSN;  		qp_attr->ah_attr = cm_id_priv->av.ah_attr; +		if (!cm_id_priv->av.valid) { +			spin_unlock_irqrestore(&cm_id_priv->lock, flags); +			return -EINVAL; +		} +		if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) { +			qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id; +			*qp_attr_mask |= IB_QP_VID; +		} +		if (!is_zero_ether_addr(cm_id_priv->av.smac)) { +			memcpy(qp_attr->smac, cm_id_priv->av.smac, +			       sizeof(qp_attr->smac)); +			*qp_attr_mask |= IB_QP_SMAC; +		} +		if (cm_id_priv->alt_av.valid) { +			if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) { +				qp_attr->alt_vlan_id = +					cm_id_priv->alt_av.ah_attr.vlan_id; +				*qp_attr_mask |= IB_QP_ALT_VID; +			} +			if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) { +				memcpy(qp_attr->alt_smac, +				       cm_id_priv->alt_av.smac, +				       sizeof(qp_attr->alt_smac)); +				*qp_attr_mask |= IB_QP_ALT_SMAC; +			} +		}  		qp_attr->path_mtu = cm_id_priv->path_mtu;  		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);  		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index dab4b41f171..d570030d899 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -328,28 +328,6 @@ static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey)  	return ret;  } -static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num) -{ -	int i; -	int err; -	struct ib_port_attr props; -	union ib_gid tmp; - -	err = ib_query_port(device, port_num, &props); -	if (err) -		return err; - -	for (i = 0; i < props.gid_tbl_len; ++i) { -		err = ib_query_gid(device, port_num, i, &tmp); -		if (err) -			return err; -		if (!memcmp(&tmp, gid, sizeof tmp)) -			return 0; -	} - -	return -EADDRNOTAVAIL; -} -  static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)  {  	dev_addr->dev_type = ARPHRD_INFINIBAND; @@ -362,7 +340,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a  	int ret;  	if (addr->sa_family != AF_IB) { -		ret = rdma_translate_ip(addr, dev_addr); +		ret = rdma_translate_ip(addr, dev_addr, NULL);  	} else {  		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);  		ret = 0; @@ -371,13 +349,14 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a  	return ret;  } -static int cma_acquire_dev(struct rdma_id_private *id_priv) +static int cma_acquire_dev(struct rdma_id_private *id_priv, +			   struct rdma_id_private *listen_id_priv)  {  	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;  	struct cma_device *cma_dev;  	union ib_gid gid, iboe_gid;  	int ret = -ENODEV; -	u8 port; +	u8 port, found_port;  	enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?  		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; @@ -386,20 +365,44 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)  		return -EINVAL;  	mutex_lock(&lock); -	iboe_addr_get_sgid(dev_addr, &iboe_gid); +	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, +		    &iboe_gid); +  	memcpy(&gid, dev_addr->src_dev_addr +  	       rdma_addr_gid_offset(dev_addr), sizeof gid); +	if (listen_id_priv && +	    rdma_port_get_link_layer(listen_id_priv->id.device, +				     listen_id_priv->id.port_num) == dev_ll) { +		cma_dev = listen_id_priv->cma_dev; +		port = listen_id_priv->id.port_num; +		if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && +		    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) +			ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, +						 &found_port, NULL); +		else +			ret = ib_find_cached_gid(cma_dev->device, &gid, +						 &found_port, NULL); + +		if (!ret && (port  == found_port)) { +			id_priv->id.port_num = found_port; +			goto out; +		} +	}  	list_for_each_entry(cma_dev, &dev_list, list) {  		for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { +			if (listen_id_priv && +			    listen_id_priv->cma_dev == cma_dev && +			    listen_id_priv->id.port_num == port) +				continue;  			if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {  				if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&  				    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) -					ret = find_gid_port(cma_dev->device, &iboe_gid, port); +					ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);  				else -					ret = find_gid_port(cma_dev->device, &gid, port); +					ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL); -				if (!ret) { -					id_priv->id.port_num = port; +				if (!ret && (port == found_port)) { +					id_priv->id.port_num = found_port;  					goto out;  				}  			} @@ -602,6 +605,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,  {  	struct ib_qp_attr qp_attr;  	int qp_attr_mask, ret; +	union ib_gid sgid;  	mutex_lock(&id_priv->qp_mutex);  	if (!id_priv->id.qp) { @@ -624,6 +628,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,  	if (ret)  		goto out; +	ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, +			   qp_attr.ah_attr.grh.sgid_index, &sgid); +	if (ret) +		goto out; + +	if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) +	    == RDMA_TRANSPORT_IB && +	    rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) +	    == IB_LINK_LAYER_ETHERNET) { +		ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); + +		if (ret) +			goto out; +	}  	if (conn_param)  		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;  	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); @@ -724,6 +742,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,  		else  			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,  						 qp_attr_mask); +  		if (qp_attr->qp_state == IB_QPS_RTR)  			qp_attr->rq_psn = id_priv->seq_num;  		break; @@ -1292,7 +1311,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  	}  	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); -	ret = cma_acquire_dev(conn_id); +	ret = cma_acquire_dev(conn_id, listen_id);  	if (ret)  		goto err2; @@ -1308,13 +1327,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)  	ret = conn_id->id.event_handler(&conn_id->id, &event);  	if (ret)  		goto err3; -  	/*  	 * Acquire mutex to prevent user executing rdma_destroy_id()  	 * while we're accessing the cm_id.  	 */  	mutex_lock(&lock); -	if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) +	if (cma_comp(conn_id, RDMA_CM_CONNECT) && +	    (conn_id->id.qp_type != IB_QPT_UD))  		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);  	mutex_unlock(&lock);  	mutex_unlock(&conn_id->handler_mutex); @@ -1451,7 +1470,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,  {  	struct rdma_cm_id *new_cm_id;  	struct rdma_id_private *listen_id, *conn_id; -	struct net_device *dev = NULL;  	struct rdma_cm_event event;  	int ret;  	struct ib_device_attr attr; @@ -1474,14 +1492,14 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,  	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);  	conn_id->state = RDMA_CM_CONNECT; -	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr); +	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);  	if (ret) {  		mutex_unlock(&conn_id->handler_mutex);  		rdma_destroy_id(new_cm_id);  		goto out;  	} -	ret = cma_acquire_dev(conn_id); +	ret = cma_acquire_dev(conn_id, listen_id);  	if (ret) {  		mutex_unlock(&conn_id->handler_mutex);  		rdma_destroy_id(new_cm_id); @@ -1529,8 +1547,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,  	cma_deref_id(conn_id);  out: -	if (dev) -		dev_put(dev);  	mutex_unlock(&listen_id->handler_mutex);  	return ret;  } @@ -1848,6 +1864,26 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)  	return 0;  } +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ +	int prio; +	struct net_device *dev; + +	prio = rt_tos2priority(tos); +	dev = ndev->priv_flags & IFF_802_1Q_VLAN ? +		vlan_dev_real_dev(ndev) : ndev; + +	if (dev->num_tc) +		return netdev_get_prio_tc_map(dev, prio); + +#if IS_ENABLED(CONFIG_VLAN_8021Q) +	if (ndev->priv_flags & IFF_802_1Q_VLAN) +		return (vlan_dev_get_egress_qos_mask(ndev, prio) & +			VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +#endif +	return 0; +} +  static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)  {  	struct rdma_route *route = &id_priv->id.route; @@ -1855,7 +1891,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)  	struct cma_work *work;  	int ret;  	struct net_device *ndev = NULL; -	u16 vid; +  	work = kzalloc(sizeof *work, GFP_KERNEL);  	if (!work) @@ -1879,20 +1915,20 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)  		goto err2;  	} -	vid = rdma_vlan_dev_vlan_id(ndev); +	route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); +	memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); +	memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len); -	iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); -	iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); +	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, +		    &route->path_rec->sgid); +	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, +		    &route->path_rec->dgid);  	route->path_rec->hop_limit = 1;  	route->path_rec->reversible = 1;  	route->path_rec->pkey = cpu_to_be16(0xffff);  	route->path_rec->mtu_selector = IB_SA_EQ; -	route->path_rec->sl = netdev_get_prio_tc_map( -			ndev->priv_flags & IFF_802_1Q_VLAN ? -				vlan_dev_real_dev(ndev) : ndev, -			rt_tos2priority(id_priv->tos)); - +	route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);  	route->path_rec->mtu = iboe_get_mtu(ndev->mtu);  	route->path_rec->rate_selector = IB_SA_EQ;  	route->path_rec->rate = iboe_get_rate(ndev); @@ -2049,8 +2085,9 @@ static void addr_handler(int status, struct sockaddr *src_addr,  			   RDMA_CM_ADDR_RESOLVED))  		goto out; +	memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));  	if (!status && !id_priv->cma_dev) -		status = cma_acquire_dev(id_priv); +		status = cma_acquire_dev(id_priv, NULL);  	if (status) {  		if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, @@ -2058,10 +2095,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,  			goto out;  		event.event = RDMA_CM_EVENT_ADDR_ERROR;  		event.status = status; -	} else { -		memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); +	} else  		event.event = RDMA_CM_EVENT_ADDR_RESOLVED; -	}  	if (id_priv->id.event_handler(&id_priv->id, &event)) {  		cma_exch(id_priv, RDMA_CM_DESTROYING); @@ -2294,9 +2329,9 @@ static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv)  	int low, high, remaining;  	unsigned int rover; -	inet_get_local_port_range(&low, &high); +	inet_get_local_port_range(&init_net, &low, &high);  	remaining = (high - low) + 1; -	rover = net_random() % remaining + low; +	rover = prandom_u32() % remaining + low;  retry:  	if (last_used_port != rover &&  	    !idr_find(ps, (unsigned short) rover)) { @@ -2466,8 +2501,11 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,  		return 0;  	sin6 = (struct sockaddr_in6 *) addr; -	if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && -	    !sin6->sin6_scope_id) + +	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) +		return 0; + +	if (!sin6->sin6_scope_id)  			return -EINVAL;  	dev_addr->bound_dev_if = sin6->sin6_scope_id; @@ -2542,17 +2580,17 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)  	if (ret)  		goto err1; +	memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));  	if (!cma_any_addr(addr)) {  		ret = cma_translate_addr(addr, &id->route.addr.dev_addr);  		if (ret)  			goto err1; -		ret = cma_acquire_dev(id_priv); +		ret = cma_acquire_dev(id_priv, NULL);  		if (ret)  			goto err1;  	} -	memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));  	if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {  		if (addr->sa_family == AF_INET)  			id_priv->afonly = 1; @@ -3281,7 +3319,8 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,  		err = -EINVAL;  		goto out2;  	} -	iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid); +	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, +		    &mc->multicast.ib->rec.port_gid);  	work->id = id_priv;  	work->mc = mc;  	INIT_WORK(&work->work, iboe_mcast_work_handler); @@ -3568,7 +3607,8 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)  			id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,  						sizeof *id_stats, RDMA_NL_RDMA_CM, -						RDMA_NL_RDMA_CM_ID_STATS); +						RDMA_NL_RDMA_CM_ID_STATS, +						NLM_F_MULTI);  			if (!id_stats)  				goto out; diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a565af5c2d2..87d1936f5c1 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -49,4 +49,6 @@ void ib_sysfs_cleanup(void);  int  ib_cache_setup(void);  void ib_cache_cleanup(void); +int ib_resolve_eth_l2_attrs(struct ib_qp *qp, +			    struct ib_qp_attr *qp_attr, int *qp_attr_mask);  #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index c47c2034ca7..3d2e489ab73 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -181,9 +181,16 @@ static void add_ref(struct iw_cm_id *cm_id)  static void rem_ref(struct iw_cm_id *cm_id)  {  	struct iwcm_id_private *cm_id_priv; +	int cb_destroy; +  	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); -	if (iwcm_deref_id(cm_id_priv) && -	    test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { + +	/* +	 * Test bit before deref in case the cm_id gets freed on another +	 * thread. +	 */ +	cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); +	if (iwcm_deref_id(cm_id_priv) && cb_destroy) {  		BUG_ON(!list_empty(&cm_id_priv->work_list));  		free_cm_id(cm_id_priv);  	} @@ -327,7 +334,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)  {  	struct iwcm_id_private *cm_id_priv;  	unsigned long flags; -	int ret;  	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);  	/* @@ -343,7 +349,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)  		cm_id_priv->state = IW_CM_STATE_DESTROYING;  		spin_unlock_irqrestore(&cm_id_priv->lock, flags);  		/* destroy the listening endpoint */ -		ret = cm_id->device->iwcm->destroy_listen(cm_id); +		cm_id->device->iwcm->destroy_listen(cm_id);  		spin_lock_irqsave(&cm_id_priv->lock, flags);  		break;  	case IW_CM_STATE_ESTABLISHED: diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c new file mode 100644 index 00000000000..b85ddbc979e --- /dev/null +++ b/drivers/infiniband/core/iwpm_msg.c @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses.  You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + *     Redistribution and use in source and binary forms, with or + *     without modification, are permitted provided that the following + *     conditions are met: + * + *      - Redistributions of source code must retain the above + *        copyright notice, this list of conditions and the following + *        disclaimer. + * + *      - Redistributions in binary form must reproduce the above + *        copyright notice, this list of conditions and the following + *        disclaimer in the documentation and/or other materials + *        provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "iwpm_util.h" + +static const char iwpm_ulib_name[] = "iWarpPortMapperUser"; +static int iwpm_ulib_version = 3; +static int iwpm_user_pid = IWPM_PID_UNDEFINED; +static atomic_t echo_nlmsg_seq; + +int iwpm_valid_pid(void) +{ +	return iwpm_user_pid > 0; +} +EXPORT_SYMBOL(iwpm_valid_pid); + +/* + * iwpm_register_pid - Send a netlink query to user space + *                     for the iwarp port mapper pid + * + * nlmsg attributes: + *	[IWPM_NLA_REG_PID_SEQ] + *	[IWPM_NLA_REG_IF_NAME] + *	[IWPM_NLA_REG_IBDEV_NAME] + *	[IWPM_NLA_REG_ULIB_NAME] + */ +int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) +{ +	struct sk_buff *skb = NULL; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	if (!iwpm_valid_client(nl_client)) { +		err_str = "Invalid port mapper client"; +		goto pid_query_error; +	} +	if (iwpm_registered_client(nl_client)) +		return 0; +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client); +	if (!skb) { +		err_str = "Unable to create a nlmsg"; +		goto pid_query_error; +	} +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL); +	if (!nlmsg_request) { +		err_str = "Unable to allocate netlink request"; +		goto pid_query_error; +	} +	msg_seq = atomic_read(&echo_nlmsg_seq); + +	/* fill in the pid request message */ +	err_str = "Unable to put attribute of the nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ); +	if (ret) +		goto pid_query_error; +	ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE, +				pm_msg->if_name, IWPM_NLA_REG_IF_NAME); +	if (ret) +		goto pid_query_error; +	ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE, +				pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME); +	if (ret) +		goto pid_query_error; +	ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE, +				(char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME); +	if (ret) +		goto pid_query_error; + +	pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n", +		__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name); + +	ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL); +	if (ret) { +		skb = NULL; /* skb is freed in the netlink send-op handling */ +		iwpm_set_registered(nl_client, 1); +		iwpm_user_pid = IWPM_PID_UNAVAILABLE; +		err_str = "Unable to send a nlmsg"; +		goto pid_query_error; +	} +	nlmsg_request->req_buffer = pm_msg; +	ret = iwpm_wait_complete_req(nlmsg_request); +	return ret; +pid_query_error: +	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +	if (skb) +		dev_kfree_skb(skb); +	if (nlmsg_request) +		iwpm_free_nlmsg_request(&nlmsg_request->kref); +	return ret; +} +EXPORT_SYMBOL(iwpm_register_pid); + +/* + * iwpm_add_mapping - Send a netlink add mapping message + *                    to the port mapper + * nlmsg attributes: + *	[IWPM_NLA_MANAGE_MAPPING_SEQ] + *	[IWPM_NLA_MANAGE_ADDR] + */ +int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) +{ +	struct sk_buff *skb = NULL; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	if (!iwpm_valid_client(nl_client)) { +		err_str = "Invalid port mapper client"; +		goto add_mapping_error; +	} +	if (!iwpm_registered_client(nl_client)) { +		err_str = "Unregistered port mapper client"; +		goto add_mapping_error; +	} +	if (!iwpm_valid_pid()) +		return 0; +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client); +	if (!skb) { +		err_str = "Unable to create a nlmsg"; +		goto add_mapping_error; +	} +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL); +	if (!nlmsg_request) { +		err_str = "Unable to allocate netlink request"; +		goto add_mapping_error; +	} +	msg_seq = atomic_read(&echo_nlmsg_seq); +	/* fill in the add mapping message */ +	err_str = "Unable to put attribute of the nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, +				IWPM_NLA_MANAGE_MAPPING_SEQ); +	if (ret) +		goto add_mapping_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), +				&pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR); +	if (ret) +		goto add_mapping_error; +	nlmsg_request->req_buffer = pm_msg; + +	ret = ibnl_unicast(skb, nlh, iwpm_user_pid); +	if (ret) { +		skb = NULL; /* skb is freed in the netlink send-op handling */ +		iwpm_user_pid = IWPM_PID_UNDEFINED; +		err_str = "Unable to send a nlmsg"; +		goto add_mapping_error; +	} +	ret = iwpm_wait_complete_req(nlmsg_request); +	return ret; +add_mapping_error: +	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +	if (skb) +		dev_kfree_skb(skb); +	if (nlmsg_request) +		iwpm_free_nlmsg_request(&nlmsg_request->kref); +	return ret; +} +EXPORT_SYMBOL(iwpm_add_mapping); + +/* + * iwpm_add_and_query_mapping - Send a netlink add and query + *                              mapping message to the port mapper + * nlmsg attributes: + *	[IWPM_NLA_QUERY_MAPPING_SEQ] + *	[IWPM_NLA_QUERY_LOCAL_ADDR] + *	[IWPM_NLA_QUERY_REMOTE_ADDR] + */ +int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) +{ +	struct sk_buff *skb = NULL; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	if (!iwpm_valid_client(nl_client)) { +		err_str = "Invalid port mapper client"; +		goto query_mapping_error; +	} +	if (!iwpm_registered_client(nl_client)) { +		err_str = "Unregistered port mapper client"; +		goto query_mapping_error; +	} +	if (!iwpm_valid_pid()) +		return 0; +	ret = -ENOMEM; +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client); +	if (!skb) { +		err_str = "Unable to create a nlmsg"; +		goto query_mapping_error; +	} +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, +				nl_client, GFP_KERNEL); +	if (!nlmsg_request) { +		err_str = "Unable to allocate netlink request"; +		goto query_mapping_error; +	} +	msg_seq = atomic_read(&echo_nlmsg_seq); + +	/* fill in the query message */ +	err_str = "Unable to put attribute of the nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, +				IWPM_NLA_QUERY_MAPPING_SEQ); +	if (ret) +		goto query_mapping_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), +				&pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR); +	if (ret) +		goto query_mapping_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), +				&pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR); +	if (ret) +		goto query_mapping_error; +	nlmsg_request->req_buffer = pm_msg; + +	ret = ibnl_unicast(skb, nlh, iwpm_user_pid); +	if (ret) { +		skb = NULL; /* skb is freed in the netlink send-op handling */ +		err_str = "Unable to send a nlmsg"; +		goto query_mapping_error; +	} +	ret = iwpm_wait_complete_req(nlmsg_request); +	return ret; +query_mapping_error: +	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +	if (skb) +		dev_kfree_skb(skb); +	if (nlmsg_request) +		iwpm_free_nlmsg_request(&nlmsg_request->kref); +	return ret; +} +EXPORT_SYMBOL(iwpm_add_and_query_mapping); + +/* + * iwpm_remove_mapping - Send a netlink remove mapping message + *                       to the port mapper + * nlmsg attributes: + *	[IWPM_NLA_MANAGE_MAPPING_SEQ] + *	[IWPM_NLA_MANAGE_ADDR] + */ +int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) +{ +	struct sk_buff *skb = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	if (!iwpm_valid_client(nl_client)) { +		err_str = "Invalid port mapper client"; +		goto remove_mapping_error; +	} +	if (!iwpm_registered_client(nl_client)) { +		err_str = "Unregistered port mapper client"; +		goto remove_mapping_error; +	} +	if (!iwpm_valid_pid()) +		return 0; +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client); +	if (!skb) { +		ret = -ENOMEM; +		err_str = "Unable to create a nlmsg"; +		goto remove_mapping_error; +	} +	msg_seq = atomic_read(&echo_nlmsg_seq); +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	err_str = "Unable to put attribute of the nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, +				IWPM_NLA_MANAGE_MAPPING_SEQ); +	if (ret) +		goto remove_mapping_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), +				local_addr, IWPM_NLA_MANAGE_ADDR); +	if (ret) +		goto remove_mapping_error; + +	ret = ibnl_unicast(skb, nlh, iwpm_user_pid); +	if (ret) { +		skb = NULL; /* skb is freed in the netlink send-op handling */ +		iwpm_user_pid = IWPM_PID_UNDEFINED; +		err_str = "Unable to send a nlmsg"; +		goto remove_mapping_error; +	} +	iwpm_print_sockaddr(local_addr, +			"remove_mapping: Local sockaddr:"); +	return 0; +remove_mapping_error: +	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); +	if (skb) +		dev_kfree_skb_any(skb); +	return ret; +} +EXPORT_SYMBOL(iwpm_remove_mapping); + +/* netlink attribute policy for the received response to register pid request */ +static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = { +	[IWPM_NLA_RREG_PID_SEQ]     = { .type = NLA_U32 }, +	[IWPM_NLA_RREG_IBDEV_NAME]  = { .type = NLA_STRING, +					.len = IWPM_DEVNAME_SIZE - 1 }, +	[IWPM_NLA_RREG_ULIB_NAME]   = { .type = NLA_STRING, +					.len = IWPM_ULIBNAME_SIZE - 1 }, +	[IWPM_NLA_RREG_ULIB_VER]    = { .type = NLA_U16 }, +	[IWPM_NLA_RREG_PID_ERR]     = { .type = NLA_U16 } +}; + +/* + * iwpm_register_pid_cb - Process a port mapper response to + *                        iwpm_register_pid() + */ +int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX]; +	struct iwpm_dev_data *pm_msg; +	char *dev_name, *iwpm_name; +	u32 msg_seq; +	u8 nl_client; +	u16 iwpm_version; +	const char *msg_type = "Register Pid response"; + +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX, +				resp_reg_policy, nltb, msg_type)) +		return -EINVAL; + +	msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]); +	nlmsg_request = iwpm_find_nlmsg_request(msg_seq); +	if (!nlmsg_request) { +		pr_info("%s: Could not find a matching request (seq = %u)\n", +				 __func__, msg_seq); +		return -EINVAL; +	} +	pm_msg = nlmsg_request->req_buffer; +	nl_client = nlmsg_request->nl_client; +	dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]); +	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]); +	iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]); + +	/* check device name, ulib name and version */ +	if (strcmp(pm_msg->dev_name, dev_name) || +			strcmp(iwpm_ulib_name, iwpm_name) || +			iwpm_version != iwpm_ulib_version) { + +		pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n", +				__func__, dev_name, iwpm_name, iwpm_version); +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto register_pid_response_exit; +	} +	iwpm_user_pid = cb->nlh->nlmsg_pid; +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); +	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", +			__func__, iwpm_user_pid); +	if (iwpm_valid_client(nl_client)) +		iwpm_set_registered(nl_client, 1); +register_pid_response_exit: +	nlmsg_request->request_done = 1; +	/* always for found nlmsg_request */ +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	barrier(); +	wake_up(&nlmsg_request->waitq); +	return 0; +} +EXPORT_SYMBOL(iwpm_register_pid_cb); + +/* netlink attribute policy for the received response to add mapping request */ +static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = { +	[IWPM_NLA_MANAGE_MAPPING_SEQ]     = { .type = NLA_U32 }, +	[IWPM_NLA_MANAGE_ADDR]            = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_RMANAGE_MAPPING_ERR]	  = { .type = NLA_U16 } +}; + +/* + * iwpm_add_mapping_cb - Process a port mapper response to + *                       iwpm_add_mapping() + */ +int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct iwpm_sa_data *pm_msg; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX]; +	struct sockaddr_storage *local_sockaddr; +	struct sockaddr_storage *mapped_sockaddr; +	const char *msg_type; +	u32 msg_seq; + +	msg_type = "Add Mapping response"; +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX, +				resp_add_policy, nltb, msg_type)) +		return -EINVAL; + +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + +	msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]); +	nlmsg_request = iwpm_find_nlmsg_request(msg_seq); +	if (!nlmsg_request) { +		pr_info("%s: Could not find a matching request (seq = %u)\n", +				 __func__, msg_seq); +		return -EINVAL; +	} +	pm_msg = nlmsg_request->req_buffer; +	local_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_MANAGE_ADDR]); +	mapped_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]); + +	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) { +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto add_mapping_response_exit; +	} +	if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) { +		pr_info("%s: Sockaddr family doesn't match the requested one\n", +				__func__); +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto add_mapping_response_exit; +	} +	memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr, +			sizeof(*mapped_sockaddr)); +	iwpm_print_sockaddr(&pm_msg->loc_addr, +			"add_mapping: Local sockaddr:"); +	iwpm_print_sockaddr(&pm_msg->mapped_loc_addr, +			"add_mapping: Mapped local sockaddr:"); + +add_mapping_response_exit: +	nlmsg_request->request_done = 1; +	/* always for found request */ +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	barrier(); +	wake_up(&nlmsg_request->waitq); +	return 0; +} +EXPORT_SYMBOL(iwpm_add_mapping_cb); + +/* netlink attribute policy for the response to add and query mapping request */ +static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = { +	[IWPM_NLA_QUERY_MAPPING_SEQ]      = { .type = NLA_U32 }, +	[IWPM_NLA_QUERY_LOCAL_ADDR]       = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_QUERY_REMOTE_ADDR]      = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) }, +	[IWPM_NLA_RQUERY_MAPPING_ERR]	  = { .type = NLA_U16 } +}; + +/* + * iwpm_add_and_query_mapping_cb - Process a port mapper response to + *                                 iwpm_add_and_query_mapping() + */ +int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, +				struct netlink_callback *cb) +{ +	struct iwpm_sa_data *pm_msg; +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX]; +	struct sockaddr_storage *local_sockaddr, *remote_sockaddr; +	struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr; +	const char *msg_type; +	u32 msg_seq; +	u16 err_code; + +	msg_type = "Query Mapping response"; +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX, +				resp_query_policy, nltb, msg_type)) +		return -EINVAL; +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + +	msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]); +	nlmsg_request = iwpm_find_nlmsg_request(msg_seq); +	if (!nlmsg_request) { +		pr_info("%s: Could not find a matching request (seq = %u)\n", +				 __func__, msg_seq); +			return -EINVAL; +	} +	pm_msg = nlmsg_request->req_buffer; +	local_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); +	remote_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); +	mapped_loc_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); +	mapped_rem_sockaddr = (struct sockaddr_storage *) +			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]); + +	err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]); +	if (err_code == IWPM_REMOTE_QUERY_REJECT) { +		pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n", +			__func__, cb->nlh->nlmsg_pid, msg_seq); +		nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT; +	} +	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) || +		iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) { +		pr_info("%s: Incorrect local sockaddr\n", __func__); +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto query_mapping_response_exit; +	} +	if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family || +		mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) { +		pr_info("%s: Sockaddr family doesn't match the requested one\n", +				__func__); +		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; +		goto query_mapping_response_exit; +	} +	memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr, +			sizeof(*mapped_loc_sockaddr)); +	memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr, +			sizeof(*mapped_rem_sockaddr)); + +	iwpm_print_sockaddr(&pm_msg->loc_addr, +			"query_mapping: Local sockaddr:"); +	iwpm_print_sockaddr(&pm_msg->mapped_loc_addr, +			"query_mapping: Mapped local sockaddr:"); +	iwpm_print_sockaddr(&pm_msg->rem_addr, +			"query_mapping: Remote sockaddr:"); +	iwpm_print_sockaddr(&pm_msg->mapped_rem_addr, +			"query_mapping: Mapped remote sockaddr:"); +query_mapping_response_exit: +	nlmsg_request->request_done = 1; +	/* always for found request */ +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	barrier(); +	wake_up(&nlmsg_request->waitq); +	return 0; +} +EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); + +/* netlink attribute policy for the received request for mapping info */ +static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { +	[IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING, +					.len = IWPM_ULIBNAME_SIZE - 1 }, +	[IWPM_NLA_MAPINFO_ULIB_VER]  = { .type = NLA_U16 } +}; + +/* + * iwpm_mapping_info_cb - Process a port mapper request for mapping info + */ +int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX]; +	const char *msg_type = "Mapping Info response"; +	int iwpm_pid; +	u8 nl_client; +	char *iwpm_name; +	u16 iwpm_version; +	int ret = -EINVAL; + +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX, +				resp_mapinfo_policy, nltb, msg_type)) { +		pr_info("%s: Unable to parse nlmsg\n", __func__); +		return ret; +	} +	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]); +	iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]); +	if (strcmp(iwpm_ulib_name, iwpm_name) || +			iwpm_version != iwpm_ulib_version) { +		pr_info("%s: Invalid port mapper name = %s version = %d\n", +				__func__, iwpm_name, iwpm_version); +		return ret; +	} +	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); +	if (!iwpm_valid_client(nl_client)) { +		pr_info("%s: Invalid port mapper client = %d\n", +				__func__, nl_client); +		return ret; +	} +	iwpm_set_registered(nl_client, 0); +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); +	if (!iwpm_mapinfo_available()) +		return 0; +	iwpm_pid = cb->nlh->nlmsg_pid; +	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", +		 __func__, iwpm_pid); +	ret = iwpm_send_mapinfo(nl_client, iwpm_pid); +	return ret; +} +EXPORT_SYMBOL(iwpm_mapping_info_cb); + +/* netlink attribute policy for the received mapping info ack */ +static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = { +	[IWPM_NLA_MAPINFO_SEQ]    =   { .type = NLA_U32 }, +	[IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 }, +	[IWPM_NLA_MAPINFO_ACK_NUM] =  { .type = NLA_U32 } +}; + +/* + * iwpm_ack_mapping_info_cb - Process a port mapper ack for + *                            the provided mapping info records + */ +int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX]; +	u32 mapinfo_send, mapinfo_ack; +	const char *msg_type = "Mapping Info Ack"; + +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX, +				ack_mapinfo_policy, nltb, msg_type)) +		return -EINVAL; +	mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]); +	mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]); +	if (mapinfo_ack != mapinfo_send) +		pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n", +			__func__, mapinfo_send, mapinfo_ack); +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); +	return 0; +} +EXPORT_SYMBOL(iwpm_ack_mapping_info_cb); + +/* netlink attribute policy for the received port mapper error message */ +static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = { +	[IWPM_NLA_ERR_SEQ]        = { .type = NLA_U32 }, +	[IWPM_NLA_ERR_CODE]       = { .type = NLA_U16 }, +}; + +/* + * iwpm_mapping_error_cb - Process a port mapper error message + */ +int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); +	struct nlattr *nltb[IWPM_NLA_ERR_MAX]; +	u32 msg_seq; +	u16 err_code; +	const char *msg_type = "Mapping Error Msg"; + +	if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX, +				map_error_policy, nltb, msg_type)) +		return -EINVAL; + +	msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]); +	err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]); +	pr_info("%s: Received msg seq = %u err code = %u client = %d\n", +				__func__, msg_seq, err_code, nl_client); +	/* look for nlmsg_request */ +	nlmsg_request = iwpm_find_nlmsg_request(msg_seq); +	if (!nlmsg_request) { +		/* not all errors have associated requests */ +		pr_debug("Could not find matching req (seq = %u)\n", msg_seq); +		return 0; +	} +	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); +	nlmsg_request->err_code = err_code; +	nlmsg_request->request_done = 1; +	/* always for found request */ +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	barrier(); +	wake_up(&nlmsg_request->waitq); +	return 0; +} +EXPORT_SYMBOL(iwpm_mapping_error_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c new file mode 100644 index 00000000000..69e9f84c160 --- /dev/null +++ b/drivers/infiniband/core/iwpm_util.c @@ -0,0 +1,607 @@ +/* + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses.  You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + *     Redistribution and use in source and binary forms, with or + *     without modification, are permitted provided that the following + *     conditions are met: + * + *      - Redistributions of source code must retain the above + *	  copyright notice, this list of conditions and the following + *	  disclaimer. + * + *      - Redistributions in binary form must reproduce the above + *	  copyright notice, this list of conditions and the following + *	  disclaimer in the documentation and/or other materials + *	  provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "iwpm_util.h" + +#define IWPM_HASH_BUCKET_SIZE	512 +#define IWPM_HASH_BUCKET_MASK	(IWPM_HASH_BUCKET_SIZE - 1) + +static LIST_HEAD(iwpm_nlmsg_req_list); +static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); + +static struct hlist_head *iwpm_hash_bucket; +static DEFINE_SPINLOCK(iwpm_mapinfo_lock); + +static DEFINE_MUTEX(iwpm_admin_lock); +static struct iwpm_admin_data iwpm_admin; + +int iwpm_init(u8 nl_client) +{ +	if (iwpm_valid_client(nl_client)) +		return -EINVAL; +	mutex_lock(&iwpm_admin_lock); +	if (atomic_read(&iwpm_admin.refcount) == 0) { +		iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE * +					sizeof(struct hlist_head), GFP_KERNEL); +		if (!iwpm_hash_bucket) { +			mutex_unlock(&iwpm_admin_lock); +			pr_err("%s Unable to create mapinfo hash table\n", __func__); +			return -ENOMEM; +		} +	} +	atomic_inc(&iwpm_admin.refcount); +	mutex_unlock(&iwpm_admin_lock); +	iwpm_set_valid(nl_client, 1); +	return 0; +} +EXPORT_SYMBOL(iwpm_init); + +static void free_hash_bucket(void); + +int iwpm_exit(u8 nl_client) +{ + +	if (!iwpm_valid_client(nl_client)) +		return -EINVAL; +	mutex_lock(&iwpm_admin_lock); +	if (atomic_read(&iwpm_admin.refcount) == 0) { +		mutex_unlock(&iwpm_admin_lock); +		pr_err("%s Incorrect usage - negative refcount\n", __func__); +		return -EINVAL; +	} +	if (atomic_dec_and_test(&iwpm_admin.refcount)) { +		free_hash_bucket(); +		pr_debug("%s: Mapinfo hash table is destroyed\n", __func__); +	} +	mutex_unlock(&iwpm_admin_lock); +	iwpm_set_valid(nl_client, 0); +	return 0; +} +EXPORT_SYMBOL(iwpm_exit); + +static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *, +					       struct sockaddr_storage *); + +int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, +			struct sockaddr_storage *mapped_sockaddr, +			u8 nl_client) +{ +	struct hlist_head *hash_bucket_head; +	struct iwpm_mapping_info *map_info; +	unsigned long flags; + +	if (!iwpm_valid_client(nl_client)) +		return -EINVAL; +	map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); +	if (!map_info) { +		pr_err("%s: Unable to allocate a mapping info\n", __func__); +		return -ENOMEM; +	} +	memcpy(&map_info->local_sockaddr, local_sockaddr, +	       sizeof(struct sockaddr_storage)); +	memcpy(&map_info->mapped_sockaddr, mapped_sockaddr, +	       sizeof(struct sockaddr_storage)); +	map_info->nl_client = nl_client; + +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	if (iwpm_hash_bucket) { +		hash_bucket_head = get_hash_bucket_head( +					&map_info->local_sockaddr, +					&map_info->mapped_sockaddr); +		hlist_add_head(&map_info->hlist_node, hash_bucket_head); +	} +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +	return 0; +} +EXPORT_SYMBOL(iwpm_create_mapinfo); + +int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, +			struct sockaddr_storage *mapped_local_addr) +{ +	struct hlist_node *tmp_hlist_node; +	struct hlist_head *hash_bucket_head; +	struct iwpm_mapping_info *map_info = NULL; +	unsigned long flags; +	int ret = -EINVAL; + +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	if (iwpm_hash_bucket) { +		hash_bucket_head = get_hash_bucket_head( +					local_sockaddr, +					mapped_local_addr); +		hlist_for_each_entry_safe(map_info, tmp_hlist_node, +					hash_bucket_head, hlist_node) { + +			if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr, +						mapped_local_addr)) { + +				hlist_del_init(&map_info->hlist_node); +				kfree(map_info); +				ret = 0; +				break; +			} +		} +	} +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +	return ret; +} +EXPORT_SYMBOL(iwpm_remove_mapinfo); + +static void free_hash_bucket(void) +{ +	struct hlist_node *tmp_hlist_node; +	struct iwpm_mapping_info *map_info; +	unsigned long flags; +	int i; + +	/* remove all the mapinfo data from the list */ +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { +		hlist_for_each_entry_safe(map_info, tmp_hlist_node, +			&iwpm_hash_bucket[i], hlist_node) { + +				hlist_del_init(&map_info->hlist_node); +				kfree(map_info); +			} +	} +	/* free the hash list */ +	kfree(iwpm_hash_bucket); +	iwpm_hash_bucket = NULL; +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +} + +struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, +					u8 nl_client, gfp_t gfp) +{ +	struct iwpm_nlmsg_request *nlmsg_request = NULL; +	unsigned long flags; + +	nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp); +	if (!nlmsg_request) { +		pr_err("%s Unable to allocate a nlmsg_request\n", __func__); +		return NULL; +	} +	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); +	list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list); +	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); + +	kref_init(&nlmsg_request->kref); +	kref_get(&nlmsg_request->kref); +	nlmsg_request->nlmsg_seq = nlmsg_seq; +	nlmsg_request->nl_client = nl_client; +	nlmsg_request->request_done = 0; +	nlmsg_request->err_code = 0; +	return nlmsg_request; +} + +void iwpm_free_nlmsg_request(struct kref *kref) +{ +	struct iwpm_nlmsg_request *nlmsg_request; +	unsigned long flags; + +	nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref); + +	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); +	list_del_init(&nlmsg_request->inprocess_list); +	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); + +	if (!nlmsg_request->request_done) +		pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n", +			__func__, nlmsg_request->nlmsg_seq); +	kfree(nlmsg_request); +} + +struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq) +{ +	struct iwpm_nlmsg_request *nlmsg_request; +	struct iwpm_nlmsg_request *found_request = NULL; +	unsigned long flags; + +	spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); +	list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list, +			    inprocess_list) { +		if (nlmsg_request->nlmsg_seq == echo_seq) { +			found_request = nlmsg_request; +			kref_get(&nlmsg_request->kref); +			break; +		} +	} +	spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); +	return found_request; +} + +int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request) +{ +	int ret; +	init_waitqueue_head(&nlmsg_request->waitq); + +	ret = wait_event_timeout(nlmsg_request->waitq, +			(nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT); +	if (!ret) { +		ret = -EINVAL; +		pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n", +			__func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq); +	} else { +		ret = nlmsg_request->err_code; +	} +	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); +	return ret; +} + +int iwpm_get_nlmsg_seq(void) +{ +	return atomic_inc_return(&iwpm_admin.nlmsg_seq); +} + +int iwpm_valid_client(u8 nl_client) +{ +	if (nl_client >= RDMA_NL_NUM_CLIENTS) +		return 0; +	return iwpm_admin.client_list[nl_client]; +} + +void iwpm_set_valid(u8 nl_client, int valid) +{ +	if (nl_client >= RDMA_NL_NUM_CLIENTS) +		return; +	iwpm_admin.client_list[nl_client] = valid; +} + +/* valid client */ +int iwpm_registered_client(u8 nl_client) +{ +	return iwpm_admin.reg_list[nl_client]; +} + +/* valid client */ +void iwpm_set_registered(u8 nl_client, int reg) +{ +	iwpm_admin.reg_list[nl_client] = reg; +} + +int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, +				struct sockaddr_storage *b_sockaddr) +{ +	if (a_sockaddr->ss_family != b_sockaddr->ss_family) +		return 1; +	if (a_sockaddr->ss_family == AF_INET) { +		struct sockaddr_in *a4_sockaddr = +			(struct sockaddr_in *)a_sockaddr; +		struct sockaddr_in *b4_sockaddr = +			(struct sockaddr_in *)b_sockaddr; +		if (!memcmp(&a4_sockaddr->sin_addr, +			&b4_sockaddr->sin_addr, sizeof(struct in_addr)) +			&& a4_sockaddr->sin_port == b4_sockaddr->sin_port) +				return 0; + +	} else if (a_sockaddr->ss_family == AF_INET6) { +		struct sockaddr_in6 *a6_sockaddr = +			(struct sockaddr_in6 *)a_sockaddr; +		struct sockaddr_in6 *b6_sockaddr = +			(struct sockaddr_in6 *)b_sockaddr; +		if (!memcmp(&a6_sockaddr->sin6_addr, +			&b6_sockaddr->sin6_addr, sizeof(struct in6_addr)) +			&& a6_sockaddr->sin6_port == b6_sockaddr->sin6_port) +				return 0; + +	} else { +		pr_err("%s: Invalid sockaddr family\n", __func__); +	} +	return 1; +} + +struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, +						int nl_client) +{ +	struct sk_buff *skb = NULL; + +	skb = dev_alloc_skb(NLMSG_GOODSIZE); +	if (!skb) { +		pr_err("%s Unable to allocate skb\n", __func__); +		goto create_nlmsg_exit; +	} +	if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op, +			   NLM_F_REQUEST))) { +		pr_warn("%s: Unable to put the nlmsg header\n", __func__); +		dev_kfree_skb(skb); +		skb = NULL; +	} +create_nlmsg_exit: +	return skb; +} + +int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, +				   const struct nla_policy *nlmsg_policy, +				   struct nlattr *nltb[], const char *msg_type) +{ +	int nlh_len = 0; +	int ret; +	const char *err_str = ""; + +	ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy); +	if (ret) { +		err_str = "Invalid attribute"; +		goto parse_nlmsg_error; +	} +	ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy); +	if (ret) { +		err_str = "Unable to parse the nlmsg"; +		goto parse_nlmsg_error; +	} +	ret = iwpm_validate_nlmsg_attr(nltb, policy_max); +	if (ret) { +		err_str = "Invalid NULL attribute"; +		goto parse_nlmsg_error; +	} +	return 0; +parse_nlmsg_error: +	pr_warn("%s: %s (msg type %s ret = %d)\n", +			__func__, err_str, msg_type, ret); +	return ret; +} + +void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg) +{ +	struct sockaddr_in6 *sockaddr_v6; +	struct sockaddr_in *sockaddr_v4; + +	switch (sockaddr->ss_family) { +	case AF_INET: +		sockaddr_v4 = (struct sockaddr_in *)sockaddr; +		pr_debug("%s IPV4 %pI4: %u(0x%04X)\n", +			msg, &sockaddr_v4->sin_addr, +			ntohs(sockaddr_v4->sin_port), +			ntohs(sockaddr_v4->sin_port)); +		break; +	case AF_INET6: +		sockaddr_v6 = (struct sockaddr_in6 *)sockaddr; +		pr_debug("%s IPV6 %pI6: %u(0x%04X)\n", +			msg, &sockaddr_v6->sin6_addr, +			ntohs(sockaddr_v6->sin6_port), +			ntohs(sockaddr_v6->sin6_port)); +		break; +	default: +		break; +	} +} + +static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr) +{ +	u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0); +	u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0); +	return hash; +} + +static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr) +{ +	u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0); +	u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0); +	return hash; +} + +static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage +					       *local_sockaddr, +					       struct sockaddr_storage +					       *mapped_sockaddr) +{ +	u32 local_hash, mapped_hash, hash; + +	if (local_sockaddr->ss_family == AF_INET) { +		local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr); +		mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr); + +	} else if (local_sockaddr->ss_family == AF_INET6) { +		local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr); +		mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr); +	} else { +		pr_err("%s: Invalid sockaddr family\n", __func__); +		return NULL; +	} + +	if (local_hash == mapped_hash) /* if port mapper isn't available */ +		hash = local_hash; +	else +		hash = jhash_2words(local_hash, mapped_hash, 0); + +	return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK]; +} + +static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) +{ +	struct sk_buff *skb = NULL; +	struct nlmsghdr *nlh; +	u32 msg_seq; +	const char *err_str = ""; +	int ret = -EINVAL; + +	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client); +	if (!skb) { +		err_str = "Unable to create a nlmsg"; +		goto mapinfo_num_error; +	} +	nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); +	msg_seq = 0; +	err_str = "Unable to put attribute of mapinfo number nlmsg"; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ); +	if (ret) +		goto mapinfo_num_error; +	ret = ibnl_put_attr(skb, nlh, sizeof(u32), +				&mapping_num, IWPM_NLA_MAPINFO_SEND_NUM); +	if (ret) +		goto mapinfo_num_error; +	ret = ibnl_unicast(skb, nlh, iwpm_pid); +	if (ret) { +		skb = NULL; +		err_str = "Unable to send a nlmsg"; +		goto mapinfo_num_error; +	} +	pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num); +	return 0; +mapinfo_num_error: +	pr_info("%s: %s\n", __func__, err_str); +	if (skb) +		dev_kfree_skb(skb); +	return ret; +} + +static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid) +{ +	struct nlmsghdr *nlh = NULL; +	int ret = 0; + +	if (!skb) +		return ret; +	if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, +			   RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { +		pr_warn("%s Unable to put NLMSG_DONE\n", __func__); +		return -ENOMEM; +	} +	nlh->nlmsg_type = NLMSG_DONE; +	ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid); +	if (ret) +		pr_warn("%s Unable to send a nlmsg\n", __func__); +	return ret; +} + +int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) +{ +	struct iwpm_mapping_info *map_info; +	struct sk_buff *skb = NULL; +	struct nlmsghdr *nlh; +	int skb_num = 0, mapping_num = 0; +	int i = 0, nlmsg_bytes = 0; +	unsigned long flags; +	const char *err_str = ""; +	int ret; + +	skb = dev_alloc_skb(NLMSG_GOODSIZE); +	if (!skb) { +		ret = -ENOMEM; +		err_str = "Unable to allocate skb"; +		goto send_mapping_info_exit; +	} +	skb_num++; +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { +		hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], +				     hlist_node) { +			if (map_info->nl_client != nl_client) +				continue; +			nlh = NULL; +			if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, +					RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { +				ret = -ENOMEM; +				err_str = "Unable to put the nlmsg header"; +				goto send_mapping_info_unlock; +			} +			err_str = "Unable to put attribute of the nlmsg"; +			ret = ibnl_put_attr(skb, nlh, +					sizeof(struct sockaddr_storage), +					&map_info->local_sockaddr, +					IWPM_NLA_MAPINFO_LOCAL_ADDR); +			if (ret) +				goto send_mapping_info_unlock; + +			ret = ibnl_put_attr(skb, nlh, +					sizeof(struct sockaddr_storage), +					&map_info->mapped_sockaddr, +					IWPM_NLA_MAPINFO_MAPPED_ADDR); +			if (ret) +				goto send_mapping_info_unlock; + +			iwpm_print_sockaddr(&map_info->local_sockaddr, +				"send_mapping_info: Local sockaddr:"); +			iwpm_print_sockaddr(&map_info->mapped_sockaddr, +				"send_mapping_info: Mapped local sockaddr:"); +			mapping_num++; +			nlmsg_bytes += nlh->nlmsg_len; + +			/* check if all mappings can fit in one skb */ +			if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) { +				/* and leave room for NLMSG_DONE */ +				nlmsg_bytes = 0; +				skb_num++; +				spin_unlock_irqrestore(&iwpm_mapinfo_lock, +						       flags); +				/* send the skb */ +				ret = send_nlmsg_done(skb, nl_client, iwpm_pid); +				skb = NULL; +				if (ret) { +					err_str = "Unable to send map info"; +					goto send_mapping_info_exit; +				} +				if (skb_num == IWPM_MAPINFO_SKB_COUNT) { +					ret = -ENOMEM; +					err_str = "Insufficient skbs for map info"; +					goto send_mapping_info_exit; +				} +				skb = dev_alloc_skb(NLMSG_GOODSIZE); +				if (!skb) { +					ret = -ENOMEM; +					err_str = "Unable to allocate skb"; +					goto send_mapping_info_exit; +				} +				spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +			} +		} +	} +send_mapping_info_unlock: +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +send_mapping_info_exit: +	if (ret) { +		pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret); +		if (skb) +			dev_kfree_skb(skb); +		return ret; +	} +	send_nlmsg_done(skb, nl_client, iwpm_pid); +	return send_mapinfo_num(mapping_num, nl_client, iwpm_pid); +} + +int iwpm_mapinfo_available(void) +{ +	unsigned long flags; +	int full_bucket = 0, i = 0; + +	spin_lock_irqsave(&iwpm_mapinfo_lock, flags); +	if (iwpm_hash_bucket) { +		for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { +			if (!hlist_empty(&iwpm_hash_bucket[i])) { +				full_bucket = 1; +				break; +			} +		} +	} +	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +	return full_bucket; +} diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h new file mode 100644 index 00000000000..9777c869a14 --- /dev/null +++ b/drivers/infiniband/core/iwpm_util.h @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses.  You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + *     Redistribution and use in source and binary forms, with or + *     without modification, are permitted provided that the following + *     conditions are met: + * + *      - Redistributions of source code must retain the above + *	  copyright notice, this list of conditions and the following + *	  disclaimer. + * + *      - Redistributions in binary form must reproduce the above + *	  copyright notice, this list of conditions and the following + *	  disclaimer in the documentation and/or other materials + *	  provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _IWPM_UTIL_H +#define _IWPM_UTIL_H + +#include <linux/module.h> +#include <linux/io.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/delay.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <linux/jhash.h> +#include <linux/kref.h> +#include <net/netlink.h> +#include <linux/errno.h> +#include <rdma/iw_portmap.h> +#include <rdma/rdma_netlink.h> + + +#define IWPM_NL_RETRANS		3 +#define IWPM_NL_TIMEOUT		(10*HZ) +#define IWPM_MAPINFO_SKB_COUNT	20 + +#define IWPM_PID_UNDEFINED     -1 +#define IWPM_PID_UNAVAILABLE   -2 + +struct iwpm_nlmsg_request { +	struct list_head    inprocess_list; +	__u32               nlmsg_seq; +	void                *req_buffer; +	u8	            nl_client; +	u8                  request_done; +	u16                 err_code; +	wait_queue_head_t   waitq; +	struct kref         kref; +}; + +struct iwpm_mapping_info { +	struct hlist_node hlist_node; +	struct sockaddr_storage local_sockaddr; +	struct sockaddr_storage mapped_sockaddr; +	u8     nl_client; +}; + +struct iwpm_admin_data { +	atomic_t refcount; +	atomic_t nlmsg_seq; +	int      client_list[RDMA_NL_NUM_CLIENTS]; +	int      reg_list[RDMA_NL_NUM_CLIENTS]; +}; + +/** + * iwpm_get_nlmsg_request - Allocate and initialize netlink message request + * @nlmsg_seq: Sequence number of the netlink message + * @nl_client: The index of the netlink client + * @gfp: Indicates how the memory for the request should be allocated + * + * Returns the newly allocated netlink request object if successful, + * otherwise returns NULL + */ +struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, +						u8 nl_client, gfp_t gfp); + +/** + * iwpm_free_nlmsg_request - Deallocate netlink message request + * @kref: Holds reference of netlink message request + */ +void iwpm_free_nlmsg_request(struct kref *kref); + +/** + * iwpm_find_nlmsg_request - Find netlink message request in the request list + * @echo_seq: Sequence number of the netlink request to find + * + * Returns the found netlink message request, + * if not found, returns NULL + */ +struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq); + +/** + * iwpm_wait_complete_req - Block while servicing the netlink request + * @nlmsg_request: Netlink message request to service + * + * Wakes up, after the request is completed or expired + * Returns 0 if the request is complete without error + */ +int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); + +/** + * iwpm_get_nlmsg_seq - Get the sequence number for a netlink + *			message to send to the port mapper + * + * Returns the sequence number for the netlink message. + */ +int iwpm_get_nlmsg_seq(void); + +/** + * iwpm_valid_client - Check if the port mapper client is valid + * @nl_client: The index of the netlink client + * + * Valid clients need to call iwpm_init() before using + * the port mapper + */ +int iwpm_valid_client(u8 nl_client); + +/** + * iwpm_set_valid - Set the port mapper client to valid or not + * @nl_client: The index of the netlink client + * @valid: 1 if valid or 0 if invalid + */ +void iwpm_set_valid(u8 nl_client, int valid); + +/** + * iwpm_registered_client - Check if the port mapper client is registered + * @nl_client: The index of the netlink client + * + * Call iwpm_register_pid() to register a client + */ +int iwpm_registered_client(u8 nl_client); + +/** + * iwpm_set_registered - Set the port mapper client to registered or not + * @nl_client: The index of the netlink client + * @reg: 1 if registered or 0 if not + */ +void iwpm_set_registered(u8 nl_client, int reg); + +/** + * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of + *                     a client to the user space port mapper + * @nl_client: The index of the netlink client + * @iwpm_pid: The pid of the user space port mapper + * + * If successful, returns the number of sent mapping info records + */ +int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid); + +/** + * iwpm_mapinfo_available - Check if any mapping info records is available + *		            in the hash table + * + * Returns 1 if mapping information is available, otherwise returns 0 + */ +int iwpm_mapinfo_available(void); + +/** + * iwpm_compare_sockaddr - Compare two sockaddr storage structs + * + * Returns 0 if they are holding the same ip/tcp address info, + * otherwise returns 1 + */ +int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, +			struct sockaddr_storage *b_sockaddr); + +/** + * iwpm_validate_nlmsg_attr - Check for NULL netlink attributes + * @nltb: Holds address of each netlink message attributes + * @nla_count: Number of netlink message attributes + * + * Returns error if any of the nla_count attributes is NULL + */ +static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[], +					   int nla_count) +{ +	int i; +	for (i = 1; i < nla_count; i++) { +		if (!nltb[i]) +			return -EINVAL; +	} +	return 0; +} + +/** + * iwpm_create_nlmsg - Allocate skb and form a netlink message + * @nl_op: Netlink message opcode + * @nlh: Holds address of the netlink message header in skb + * @nl_client: The index of the netlink client + * + * Returns the newly allcated skb, or NULL if the tailroom of the skb + * is insufficient to store the message header and payload + */ +struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, +					int nl_client); + +/** + * iwpm_parse_nlmsg - Validate and parse the received netlink message + * @cb: Netlink callback structure + * @policy_max: Maximum attribute type to be expected + * @nlmsg_policy: Validation policy + * @nltb: Array to store policy_max parsed elements + * @msg_type: Type of netlink message + * + * Returns 0 on success or a negative error code + */ +int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, +				const struct nla_policy *nlmsg_policy, +				struct nlattr *nltb[], const char *msg_type); + +/** + * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port + * @sockaddr: Socket address to print + * @msg: Message to print + */ +void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); +#endif diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 4c837e66516..ab31f136d04 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1022,12 +1022,21 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)  					mad_send_wr->send_buf.mad,  					sge[0].length,  					DMA_TO_DEVICE); +	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) +		return -ENOMEM; +  	mad_send_wr->header_mapping = sge[0].addr;  	sge[1].addr = ib_dma_map_single(mad_agent->device,  					ib_get_payload(mad_send_wr),  					sge[1].length,  					DMA_TO_DEVICE); +	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { +		ib_dma_unmap_single(mad_agent->device, +				    mad_send_wr->header_mapping, +				    sge[0].length, DMA_TO_DEVICE); +		return -ENOMEM; +	}  	mad_send_wr->payload_mapping = sge[1].addr;  	spin_lock_irqsave(&qp_info->send_queue.lock, flags); @@ -2590,6 +2599,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,  						 sizeof *mad_priv -  						   sizeof mad_priv->header,  						 DMA_FROM_DEVICE); +		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, +						  sg_list.addr))) { +			ret = -ENOMEM; +			break; +		}  		mad_priv->header.mapping = sg_list.addr;  		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;  		mad_priv->header.mad_list.mad_queue = recv_queue; diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index da06abde9e0..23dd5a5c759 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -103,13 +103,13 @@ int ibnl_remove_client(int index)  EXPORT_SYMBOL(ibnl_remove_client);  void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, -		   int len, int client, int op) +		   int len, int client, int op, int flags)  {  	unsigned char *prev_tail;  	prev_tail = skb_tail_pointer(skb);  	*nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), -			 len, NLM_F_MULTI); +			 len, flags);  	if (!*nlh)  		goto out_nlmsg_trim;  	(*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail; @@ -148,7 +148,7 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  	list_for_each_entry(client, &client_list, list) {  		if (client->index == index) {  			if (op < 0 || op >= client->nops || -			    !client->cb_table[RDMA_NL_GET_OP(op)].dump) +			    !client->cb_table[op].dump)  				return -EINVAL;  			{ @@ -172,6 +172,20 @@ static void ibnl_rcv(struct sk_buff *skb)  	mutex_unlock(&ibnl_mutex);  } +int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, +			__u32 pid) +{ +	return nlmsg_unicast(nls, skb, pid); +} +EXPORT_SYMBOL(ibnl_unicast); + +int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, +			unsigned int group, gfp_t flags) +{ +	return nlmsg_multicast(nls, skb, 0, group, flags); +} +EXPORT_SYMBOL(ibnl_multicast); +  int __init ibnl_init(void)  {  	struct netlink_kernel_cfg cfg = { diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 9838ca48438..233eaf541f5 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -42,7 +42,7 @@  #include <linux/kref.h>  #include <linux/idr.h>  #include <linux/workqueue.h> - +#include <uapi/linux/if_ether.h>  #include <rdma/ib_pack.h>  #include <rdma/ib_cache.h>  #include "sa.h" @@ -556,6 +556,13 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,  		ah_attr->grh.hop_limit     = rec->hop_limit;  		ah_attr->grh.traffic_class = rec->traffic_class;  	} +	if (force_grh) { +		memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); +		ah_attr->vlan_id = rec->vlan_id; +	} else { +		ah_attr->vlan_id = 0xffff; +	} +  	return 0;  }  EXPORT_SYMBOL(ib_init_ah_from_path); @@ -611,7 +618,7 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)  static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)  { -	bool preload = gfp_mask & __GFP_WAIT; +	bool preload = !!(gfp_mask & __GFP_WAIT);  	unsigned long flags;  	int ret, id; @@ -670,6 +677,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,  		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),  			  mad->data, &rec); +		rec.vlan_id = 0xffff; +		memset(rec.dmac, 0, ETH_ALEN); +		memset(rec.smac, 0, ETH_ALEN);  		query->callback(status, &rec, query->context);  	} else  		query->callback(status, NULL, query->context); diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index cde1e7b5b85..cbd0383f622 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -429,15 +429,19 @@ static void ib_port_release(struct kobject *kobj)  	struct attribute *a;  	int i; -	for (i = 0; (a = p->gid_group.attrs[i]); ++i) -		kfree(a); +	if (p->gid_group.attrs) { +		for (i = 0; (a = p->gid_group.attrs[i]); ++i) +			kfree(a); -	kfree(p->gid_group.attrs); +		kfree(p->gid_group.attrs); +	} -	for (i = 0; (a = p->pkey_group.attrs[i]); ++i) -		kfree(a); +	if (p->pkey_group.attrs) { +		for (i = 0; (a = p->pkey_group.attrs[i]); ++i) +			kfree(a); -	kfree(p->pkey_group.attrs); +		kfree(p->pkey_group.attrs); +	}  	kfree(p);  } @@ -534,10 +538,12 @@ static int add_port(struct ib_device *device, int port_num,  	p->port_num   = port_num;  	ret = kobject_init_and_add(&p->kobj, &port_type, -				   kobject_get(device->ports_parent), +				   device->ports_parent,  				   "%d", port_num); -	if (ret) -		goto err_put; +	if (ret) { +		kfree(p); +		return ret; +	}  	ret = sysfs_create_group(&p->kobj, &pma_group);  	if (ret) @@ -585,6 +591,7 @@ err_free_pkey:  		kfree(p->pkey_group.attrs[i]);  	kfree(p->pkey_group.attrs); +	p->pkey_group.attrs = NULL;  err_remove_gid:  	sysfs_remove_group(&p->kobj, &p->gid_group); @@ -594,13 +601,13 @@ err_free_gid:  		kfree(p->gid_group.attrs[i]);  	kfree(p->gid_group.attrs); +	p->gid_group.attrs = NULL;  err_remove_pma:  	sysfs_remove_group(&p->kobj, &pma_group);  err_put: -	kobject_put(device->ports_parent); -	kfree(p); +	kobject_put(&p->kobj);  	return ret;  } @@ -612,6 +619,8 @@ static ssize_t show_node_type(struct device *device,  	switch (dev->node_type) {  	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);  	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type); +	case RDMA_NODE_USNIC:	  return sprintf(buf, "%d: usNIC\n", dev->node_type); +	case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);  	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);  	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);  	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type); @@ -807,6 +816,22 @@ static struct attribute_group iw_stats_group = {  	.attrs	= iw_proto_stats_attrs,  }; +static void free_port_list_attributes(struct ib_device *device) +{ +	struct kobject *p, *t; + +	list_for_each_entry_safe(p, t, &device->port_list, entry) { +		struct ib_port *port = container_of(p, struct ib_port, kobj); +		list_del(&p->entry); +		sysfs_remove_group(p, &pma_group); +		sysfs_remove_group(p, &port->pkey_group); +		sysfs_remove_group(p, &port->gid_group); +		kobject_put(p); +	} + +	kobject_put(device->ports_parent); +} +  int ib_device_register_sysfs(struct ib_device *device,  			     int (*port_callback)(struct ib_device *,  						  u8, struct kobject *)) @@ -833,7 +858,7 @@ int ib_device_register_sysfs(struct ib_device *device,  	}  	device->ports_parent = kobject_create_and_add("ports", -					kobject_get(&class_dev->kobj)); +						      &class_dev->kobj);  	if (!device->ports_parent) {  		ret = -ENOMEM;  		goto err_put; @@ -860,21 +885,7 @@ int ib_device_register_sysfs(struct ib_device *device,  	return 0;  err_put: -	{ -		struct kobject *p, *t; -		struct ib_port *port; - -		list_for_each_entry_safe(p, t, &device->port_list, entry) { -			list_del(&p->entry); -			port = container_of(p, struct ib_port, kobj); -			sysfs_remove_group(p, &pma_group); -			sysfs_remove_group(p, &port->pkey_group); -			sysfs_remove_group(p, &port->gid_group); -			kobject_put(p); -		} -	} - -	kobject_put(&class_dev->kobj); +	free_port_list_attributes(device);  err_unregister:  	device_unregister(class_dev); @@ -885,22 +896,18 @@ err:  void ib_device_unregister_sysfs(struct ib_device *device)  { -	struct kobject *p, *t; -	struct ib_port *port; -  	/* Hold kobject until ib_dealloc_device() */ -	kobject_get(&device->dev.kobj); +	struct kobject *kobj_dev = kobject_get(&device->dev.kobj); +	int i; -	list_for_each_entry_safe(p, t, &device->port_list, entry) { -		list_del(&p->entry); -		port = container_of(p, struct ib_port, kobj); -		sysfs_remove_group(p, &pma_group); -		sysfs_remove_group(p, &port->pkey_group); -		sysfs_remove_group(p, &port->gid_group); -		kobject_put(p); -	} +	if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) +		sysfs_remove_group(kobj_dev, &iw_stats_group); + +	free_port_list_attributes(device); + +	for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) +		device_remove_file(&device->dev, ib_class_attributes[i]); -	kobject_put(device->ports_parent);  	device_unregister(&device->dev);  } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index b0f189be543..56a4b7ca7ee 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -57,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL");  static unsigned int max_backlog = 1024;  static struct ctl_table_header *ucma_ctl_table_hdr; -static ctl_table ucma_ctl_table[] = { +static struct ctl_table ucma_ctl_table[] = {  	{  		.procname	= "max_backlog",  		.data		= &max_backlog, @@ -271,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,  			goto out;  		}  		ctx->backlog--; -	} else if (!ctx->uid) { +	} else if (!ctx->uid || ctx->cm_id != cm_id) {  		/*  		 * We ignore events for new connections until userspace has set  		 * their context.  This can only happen if an error occurs on a @@ -655,24 +655,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,  static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,  				 struct rdma_route *route)  { -	struct rdma_dev_addr *dev_addr; -	struct net_device *dev; -	u16 vid = 0;  	resp->num_paths = route->num_paths;  	switch (route->num_paths) {  	case 0: -		dev_addr = &route->addr.dev_addr; -		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); -			if (dev) { -				vid = rdma_vlan_dev_vlan_id(dev); -				dev_put(dev); -			} - -		iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, -				    dev_addr->dst_dev_addr, vid); -		iboe_addr_get_sgid(dev_addr, -				   (union ib_gid *) &resp->ib_route[0].sgid); +		rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, +			    (union ib_gid *)&resp->ib_route[0].dgid); +		rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, +			    (union ib_gid *)&resp->ib_route[0].sgid);  		resp->ib_route[0].pkey = cpu_to_be16(0xffff);  		break;  	case 2: diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index a8411232207..a3a2e9c1639 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -42,29 +42,29 @@  #include "uverbs.h" -#define IB_UMEM_MAX_PAGE_CHUNK						\ -	((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) /	\ -	 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -	\ -	  (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))  static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)  { -	struct ib_umem_chunk *chunk, *tmp; +	struct scatterlist *sg; +	struct page *page;  	int i; -	list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { -		ib_dma_unmap_sg(dev, chunk->page_list, -				chunk->nents, DMA_BIDIRECTIONAL); -		for (i = 0; i < chunk->nents; ++i) { -			struct page *page = sg_page(&chunk->page_list[i]); +	if (umem->nmap > 0) +		ib_dma_unmap_sg(dev, umem->sg_head.sgl, +				umem->nmap, +				DMA_BIDIRECTIONAL); -			if (umem->writable && dirty) -				set_page_dirty_lock(page); -			put_page(page); -		} +	for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { -		kfree(chunk); +		page = sg_page(sg); +		if (umem->writable && dirty) +			set_page_dirty_lock(page); +		put_page(page);  	} + +	sg_free_table(&umem->sg_head); +	return; +  }  /** @@ -81,15 +81,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	struct ib_umem *umem;  	struct page **page_list;  	struct vm_area_struct **vma_list; -	struct ib_umem_chunk *chunk;  	unsigned long locked;  	unsigned long lock_limit;  	unsigned long cur_base;  	unsigned long npages;  	int ret; -	int off;  	int i;  	DEFINE_DMA_ATTRS(attrs); +	struct scatterlist *sg, *sg_list_start; +	int need_release = 0;  	if (dmasync)  		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); @@ -97,7 +97,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	if (!can_do_mlock())  		return ERR_PTR(-EPERM); -	umem = kmalloc(sizeof *umem, GFP_KERNEL); +	umem = kzalloc(sizeof *umem, GFP_KERNEL);  	if (!umem)  		return ERR_PTR(-ENOMEM); @@ -117,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	/* We assume the memory is from hugetlb until proved otherwise */  	umem->hugetlb   = 1; -	INIT_LIST_HEAD(&umem->chunk_list); -  	page_list = (struct page **) __get_free_page(GFP_KERNEL);  	if (!page_list) {  		kfree(umem); @@ -147,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  	cur_base = addr & PAGE_MASK; -	ret = 0; +	if (npages == 0) { +		ret = -EINVAL; +		goto out; +	} + +	ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); +	if (ret) +		goto out; + +	need_release = 1; +	sg_list_start = umem->sg_head.sgl; +  	while (npages) {  		ret = get_user_pages(current, current->mm, cur_base,  				     min_t(unsigned long, npages, @@ -157,54 +166,38 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,  		if (ret < 0)  			goto out; +		umem->npages += ret;  		cur_base += ret * PAGE_SIZE;  		npages   -= ret; -		off = 0; - -		while (ret) { -			chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * -					min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), -					GFP_KERNEL); -			if (!chunk) { -				ret = -ENOMEM; -				goto out; -			} - -			chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); -			sg_init_table(chunk->page_list, chunk->nents); -			for (i = 0; i < chunk->nents; ++i) { -				if (vma_list && -				    !is_vm_hugetlb_page(vma_list[i + off])) -					umem->hugetlb = 0; -				sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0); -			} - -			chunk->nmap = ib_dma_map_sg_attrs(context->device, -							  &chunk->page_list[0], -							  chunk->nents, -							  DMA_BIDIRECTIONAL, -							  &attrs); -			if (chunk->nmap <= 0) { -				for (i = 0; i < chunk->nents; ++i) -					put_page(sg_page(&chunk->page_list[i])); -				kfree(chunk); - -				ret = -ENOMEM; -				goto out; -			} - -			ret -= chunk->nents; -			off += chunk->nents; -			list_add_tail(&chunk->list, &umem->chunk_list); +		for_each_sg(sg_list_start, sg, ret, i) { +			if (vma_list && !is_vm_hugetlb_page(vma_list[i])) +				umem->hugetlb = 0; + +			sg_set_page(sg, page_list[i], PAGE_SIZE, 0);  		} -		ret = 0; +		/* preparing for next loop */ +		sg_list_start = sg;  	} +	umem->nmap = ib_dma_map_sg_attrs(context->device, +				  umem->sg_head.sgl, +				  umem->npages, +				  DMA_BIDIRECTIONAL, +				  &attrs); + +	if (umem->nmap <= 0) { +		ret = -ENOMEM; +		goto out; +	} + +	ret = 0; +  out:  	if (ret < 0) { -		__ib_umem_release(context->device, umem, 0); +		if (need_release) +			__ib_umem_release(context->device, umem, 0);  		kfree(umem);  	} else  		current->mm->pinned_vm = locked; @@ -278,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release);  int ib_umem_page_count(struct ib_umem *umem)  { -	struct ib_umem_chunk *chunk;  	int shift;  	int i;  	int n; +	struct scatterlist *sg;  	shift = ilog2(umem->page_size);  	n = 0; -	list_for_each_entry(chunk, &umem->chunk_list, list) -		for (i = 0; i < chunk->nmap; ++i) -			n += sg_dma_len(&chunk->page_list[i]) >> shift; +	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) +		n += sg_dma_len(sg) >> shift;  	return n;  } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f0d588f8859..1acb9910055 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -98,7 +98,7 @@ struct ib_umad_port {  struct ib_umad_device {  	int                  start_port, end_port; -	struct kref          ref; +	struct kobject       kobj;  	struct ib_umad_port  port[0];  }; @@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);  static void ib_umad_add_one(struct ib_device *device);  static void ib_umad_remove_one(struct ib_device *device); -static void ib_umad_release_dev(struct kref *ref) +static void ib_umad_release_dev(struct kobject *kobj)  {  	struct ib_umad_device *dev = -		container_of(ref, struct ib_umad_device, ref); +		container_of(kobj, struct ib_umad_device, kobj);  	kfree(dev);  } +static struct kobj_type ib_umad_dev_ktype = { +	.release = ib_umad_release_dev, +}; +  static int hdr_size(struct ib_umad_file *file)  {  	return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : @@ -780,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp)  {  	struct ib_umad_port *port;  	struct ib_umad_file *file; -	int ret; +	int ret = -ENXIO;  	port = container_of(inode->i_cdev, struct ib_umad_port, cdev); -	if (port) -		kref_get(&port->umad_dev->ref); -	else -		return -ENXIO;  	mutex_lock(&port->file_mutex); -	if (!port->ib_dev) { -		ret = -ENXIO; +	if (!port->ib_dev)  		goto out; -	} +	ret = -ENOMEM;  	file = kzalloc(sizeof *file, GFP_KERNEL); -	if (!file) { -		kref_put(&port->umad_dev->ref, ib_umad_release_dev); -		ret = -ENOMEM; +	if (!file)  		goto out; -	}  	mutex_init(&file->mutex);  	spin_lock_init(&file->send_lock); @@ -814,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp)  	list_add_tail(&file->port_list, &port->file_list);  	ret = nonseekable_open(inode, filp); +	if (ret) { +		list_del(&file->port_list); +		kfree(file); +		goto out; +	} + +	kobject_get(&port->umad_dev->kobj);  out:  	mutex_unlock(&port->file_mutex); @@ -852,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)  	mutex_unlock(&file->port->file_mutex);  	kfree(file); -	kref_put(&dev->ref, ib_umad_release_dev); +	kobject_put(&dev->kobj);  	return 0;  } @@ -880,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)  	int ret;  	port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); -	if (port) -		kref_get(&port->umad_dev->ref); -	else -		return -ENXIO;  	if (filp->f_flags & O_NONBLOCK) {  		if (down_trylock(&port->sm_sem)) { @@ -898,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)  	}  	ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); -	if (ret) { -		up(&port->sm_sem); -		goto fail; -	} +	if (ret) +		goto err_up_sem;  	filp->private_data = port; -	return nonseekable_open(inode, filp); +	ret = nonseekable_open(inode, filp); +	if (ret) +		goto err_clr_sm_cap; + +	kobject_get(&port->umad_dev->kobj); + +	return 0; + +err_clr_sm_cap: +	swap(props.set_port_cap_mask, props.clr_port_cap_mask); +	ib_modify_port(port->ib_dev, port->port_num, 0, &props); + +err_up_sem: +	up(&port->sm_sem);  fail: -	kref_put(&port->umad_dev->ref, ib_umad_release_dev);  	return ret;  } @@ -927,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)  	up(&port->sm_sem); -	kref_put(&port->umad_dev->ref, ib_umad_release_dev); +	kobject_put(&port->umad_dev->kobj);  	return ret;  } @@ -995,6 +1004,7 @@ static int find_overflow_devnum(void)  }  static int ib_umad_init_port(struct ib_device *device, int port_num, +			     struct ib_umad_device *umad_dev,  			     struct ib_umad_port *port)  {  	int devnum; @@ -1027,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,  	cdev_init(&port->cdev, &umad_fops);  	port->cdev.owner = THIS_MODULE; +	port->cdev.kobj.parent = &umad_dev->kobj;  	kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);  	if (cdev_add(&port->cdev, base, 1))  		goto err_cdev; @@ -1045,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,  	base += IB_UMAD_MAX_PORTS;  	cdev_init(&port->sm_cdev, &umad_sm_fops);  	port->sm_cdev.owner = THIS_MODULE; +	port->sm_cdev.kobj.parent = &umad_dev->kobj;  	kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);  	if (cdev_add(&port->sm_cdev, base, 1))  		goto err_sm_cdev; @@ -1138,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device)  	if (!umad_dev)  		return; -	kref_init(&umad_dev->ref); +	kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);  	umad_dev->start_port = s;  	umad_dev->end_port   = e; @@ -1146,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device)  	for (i = s; i <= e; ++i) {  		umad_dev->port[i - s].umad_dev = umad_dev; -		if (ib_umad_init_port(device, i, &umad_dev->port[i - s])) +		if (ib_umad_init_port(device, i, umad_dev, +				      &umad_dev->port[i - s]))  			goto err;  	} @@ -1158,7 +1171,7 @@ err:  	while (--i >= s)  		ib_umad_kill_port(&umad_dev->port[i - s]); -	kref_put(&umad_dev->ref, ib_umad_release_dev); +	kobject_put(&umad_dev->kobj);  }  static void ib_umad_remove_one(struct ib_device *device) @@ -1172,7 +1185,7 @@ static void ib_umad_remove_one(struct ib_device *device)  	for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i)  		ib_umad_kill_port(&umad_dev->port[i]); -	kref_put(&umad_dev->ref, ib_umad_release_dev); +	kobject_put(&umad_dev->kobj);  }  static char *umad_devnode(struct device *dev, umode_t *mode) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index d040b877475..a283274a5a0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -47,6 +47,22 @@  #include <rdma/ib_umem.h>  #include <rdma/ib_user_verbs.h> +#define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\ +	do {								\ +		(udata)->inbuf  = (const void __user *) (ibuf);		\ +		(udata)->outbuf = (void __user *) (obuf);		\ +		(udata)->inlen  = (ilen);				\ +		(udata)->outlen = (olen);				\ +	} while (0) + +#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen)			\ +	do {									\ +		(udata)->inbuf  = (ilen) ? (const void __user *) (ibuf) : NULL;	\ +		(udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL;	\ +		(udata)->inlen  = (ilen);					\ +		(udata)->outlen = (olen);					\ +	} while (0) +  /*   * Our lifetime rules for these structs are the following:   * @@ -178,6 +194,22 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,  			     struct ib_event *event);  void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); +struct ib_uverbs_flow_spec { +	union { +		union { +			struct ib_uverbs_flow_spec_hdr hdr; +			struct { +				__u32 type; +				__u16 size; +				__u16 reserved; +			}; +		}; +		struct ib_uverbs_flow_spec_eth     eth; +		struct ib_uverbs_flow_spec_ipv4    ipv4; +		struct ib_uverbs_flow_spec_tcp_udp tcp_udp; +	}; +}; +  #define IB_UVERBS_DECLARE_CMD(name)					\  	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\  				 const char __user *buf, int in_len,	\ @@ -217,7 +249,13 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);  IB_UVERBS_DECLARE_CMD(create_xsrq);  IB_UVERBS_DECLARE_CMD(open_xrcd);  IB_UVERBS_DECLARE_CMD(close_xrcd); -IB_UVERBS_DECLARE_CMD(create_flow); -IB_UVERBS_DECLARE_CMD(destroy_flow); + +#define IB_UVERBS_DECLARE_EX_CMD(name)				\ +	int ib_uverbs_ex_##name(struct ib_uverbs_file *file,	\ +				struct ib_udata *ucore,		\ +				struct ib_udata *uhw) + +IB_UVERBS_DECLARE_EX_CMD(create_flow); +IB_UVERBS_DECLARE_EX_CMD(destroy_flow);  #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index f2b81b9ee0d..ea6203ee7bc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -40,6 +40,7 @@  #include <asm/uaccess.h>  #include "uverbs.h" +#include "core_priv.h"  struct uverbs_lock_class {  	struct lock_class_key	key; @@ -56,14 +57,6 @@ static struct uverbs_lock_class srq_lock_class	= { .name = "SRQ-uobj" };  static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };  static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; -#define INIT_UDATA(udata, ibuf, obuf, ilen, olen)			\ -	do {								\ -		(udata)->inbuf  = (void __user *) (ibuf);		\ -		(udata)->outbuf = (void __user *) (obuf);		\ -		(udata)->inlen  = (ilen);				\ -		(udata)->outlen = (olen);				\ -	} while (0) -  /*   * The ib_uobject locking scheme is as follows:   * @@ -937,13 +930,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,  	if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))  		return -EINVAL; -	/* -	 * Local write permission is required if remote write or -	 * remote atomic permission is also requested. -	 */ -	if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && -	    !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) -		return -EINVAL; +	ret = ib_check_mr_access(cmd.access_flags); +	if (ret) +		return ret;  	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);  	if (!uobj) @@ -1973,6 +1962,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,  	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;  	if (qp->real_qp == qp) { +		ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); +		if (ret) +			goto out;  		ret = qp->device->modify_qp(qp, attr,  			modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);  	} else { @@ -2126,6 +2118,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,  			}  			next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;  			next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; +			if (next->opcode == IB_WR_SEND_WITH_IMM) +				next->ex.imm_data = +					(__be32 __force) user_wr->ex.imm_data;  		} else {  			switch (next->opcode) {  			case IB_WR_RDMA_WRITE_WITH_IMM: @@ -2599,9 +2594,12 @@ out_put:  	return ret ? ret : in_len;  } -static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec, +static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,  				union ib_flow_spec *ib_spec)  { +	if (kern_spec->reserved) +		return -EINVAL; +  	ib_spec->type = kern_spec->type;  	switch (ib_spec->type) { @@ -2639,28 +2637,34 @@ static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,  	return 0;  } -ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file, -			      const char __user *buf, int in_len, -			      int out_len) +int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, +			     struct ib_udata *ucore, +			     struct ib_udata *uhw)  {  	struct ib_uverbs_create_flow	  cmd;  	struct ib_uverbs_create_flow_resp resp;  	struct ib_uobject		  *uobj;  	struct ib_flow			  *flow_id; -	struct ib_kern_flow_attr	  *kern_flow_attr; +	struct ib_uverbs_flow_attr	  *kern_flow_attr;  	struct ib_flow_attr		  *flow_attr;  	struct ib_qp			  *qp;  	int err = 0;  	void *kern_spec;  	void *ib_spec;  	int i; -	int kern_attr_size; -	if (out_len < sizeof(resp)) +	if (ucore->inlen < sizeof(cmd)) +		return -EINVAL; + +	if (ucore->outlen < sizeof(resp))  		return -ENOSPC; -	if (copy_from_user(&cmd, buf, sizeof(cmd))) -		return -EFAULT; +	err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); +	if (err) +		return err; + +	ucore->inbuf += sizeof(cmd); +	ucore->inlen -= sizeof(cmd);  	if (cmd.comp_mask)  		return -EINVAL; @@ -2669,32 +2673,31 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,  	     !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))  		return -EPERM; -	if (cmd.flow_attr.num_of_specs < 0 || -	    cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) +	if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)  		return -EINVAL; -	kern_attr_size = cmd.flow_attr.size - sizeof(cmd) - -			 sizeof(struct ib_uverbs_cmd_hdr_ex); +	if (cmd.flow_attr.size > ucore->inlen || +	    cmd.flow_attr.size > +	    (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) +		return -EINVAL; -	if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len || -	    kern_attr_size < 0 || kern_attr_size > -	    (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec))) +	if (cmd.flow_attr.reserved[0] || +	    cmd.flow_attr.reserved[1])  		return -EINVAL;  	if (cmd.flow_attr.num_of_specs) { -		kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); +		kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size, +					 GFP_KERNEL);  		if (!kern_flow_attr)  			return -ENOMEM;  		memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); -		if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd), -				   kern_attr_size)) { -			err = -EFAULT; +		err = ib_copy_from_udata(kern_flow_attr + 1, ucore, +					 cmd.flow_attr.size); +		if (err)  			goto err_free_attr; -		}  	} else {  		kern_flow_attr = &cmd.flow_attr; -		kern_attr_size = sizeof(cmd.flow_attr);  	}  	uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); @@ -2711,7 +2714,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,  		goto err_uobj;  	} -	flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL); +	flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL);  	if (!flow_attr) {  		err = -ENOMEM;  		goto err_put; @@ -2726,19 +2729,23 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,  	kern_spec = kern_flow_attr + 1;  	ib_spec = flow_attr + 1; -	for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) { +	for (i = 0; i < flow_attr->num_of_specs && +	     cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && +	     cmd.flow_attr.size >= +	     ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {  		err = kern_spec_to_ib_spec(kern_spec, ib_spec);  		if (err)  			goto err_free;  		flow_attr->size +=  			((union ib_flow_spec *) ib_spec)->size; -		kern_attr_size -= ((struct ib_kern_spec *) kern_spec)->size; -		kern_spec += ((struct ib_kern_spec *) kern_spec)->size; +		cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; +		kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size;  		ib_spec += ((union ib_flow_spec *) ib_spec)->size;  	} -	if (kern_attr_size) { -		pr_warn("create flow failed, %d bytes left from uverb cmd\n", -			kern_attr_size); +	if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { +		pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", +			i, cmd.flow_attr.size); +		err = -EINVAL;  		goto err_free;  	}  	flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); @@ -2757,11 +2764,10 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,  	memset(&resp, 0, sizeof(resp));  	resp.flow_handle = uobj->id; -	if (copy_to_user((void __user *)(unsigned long) cmd.response, -			 &resp, sizeof(resp))) { -		err = -EFAULT; +	err = ib_copy_to_udata(ucore, +			       &resp, sizeof(resp)); +	if (err)  		goto err_copy; -	}  	put_qp_read(qp);  	mutex_lock(&file->mutex); @@ -2774,7 +2780,7 @@ ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,  	kfree(flow_attr);  	if (cmd.flow_attr.num_of_specs)  		kfree(kern_flow_attr); -	return in_len; +	return 0;  err_copy:  	idr_remove_uobj(&ib_uverbs_rule_idr, uobj);  destroy_flow: @@ -2791,16 +2797,24 @@ err_free_attr:  	return err;  } -ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file, -			       const char __user *buf, int in_len, -			       int out_len) { +int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, +			      struct ib_udata *ucore, +			      struct ib_udata *uhw) +{  	struct ib_uverbs_destroy_flow	cmd;  	struct ib_flow			*flow_id;  	struct ib_uobject		*uobj;  	int				ret; -	if (copy_from_user(&cmd, buf, sizeof(cmd))) -		return -EFAULT; +	if (ucore->inlen < sizeof(cmd)) +		return -EINVAL; + +	ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); +	if (ret) +		return ret; + +	if (cmd.comp_mask) +		return -EINVAL;  	uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle,  			      file->ucontext); @@ -2822,7 +2836,7 @@ ssize_t ib_uverbs_destroy_flow(struct ib_uverbs_file *file,  	put_uobj(uobj); -	return ret ? ret : in_len; +	return ret;  }  static int __uverbs_create_xsrq(struct ib_uverbs_file *file, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 75ad86c4abf..08219fb3338 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -115,8 +115,13 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,  	[IB_USER_VERBS_CMD_CLOSE_XRCD]		= ib_uverbs_close_xrcd,  	[IB_USER_VERBS_CMD_CREATE_XSRQ]		= ib_uverbs_create_xsrq,  	[IB_USER_VERBS_CMD_OPEN_QP]		= ib_uverbs_open_qp, -	[IB_USER_VERBS_CMD_CREATE_FLOW]		= ib_uverbs_create_flow, -	[IB_USER_VERBS_CMD_DESTROY_FLOW]	= ib_uverbs_destroy_flow +}; + +static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, +				    struct ib_udata *ucore, +				    struct ib_udata *uhw) = { +	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow, +	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow  };  static void ib_uverbs_add_one(struct ib_device *device); @@ -587,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,  {  	struct ib_uverbs_file *file = filp->private_data;  	struct ib_uverbs_cmd_hdr hdr; +	__u32 flags;  	if (count < sizeof hdr)  		return -EINVAL; @@ -594,41 +600,110 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,  	if (copy_from_user(&hdr, buf, sizeof hdr))  		return -EFAULT; -	if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || -	    !uverbs_cmd_table[hdr.command]) -		return -EINVAL; +	flags = (hdr.command & +		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; -	if (!file->ucontext && -	    hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) -		return -EINVAL; +	if (!flags) { +		__u32 command; -	if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) -		return -ENOSYS; +		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | +					   IB_USER_VERBS_CMD_COMMAND_MASK)) +			return -EINVAL; -	if (hdr.command >= IB_USER_VERBS_CMD_THRESHOLD) { -		struct ib_uverbs_cmd_hdr_ex hdr_ex; +		command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; -		if (copy_from_user(&hdr_ex, buf, sizeof(hdr_ex))) -			return -EFAULT; +		if (command >= ARRAY_SIZE(uverbs_cmd_table) || +		    !uverbs_cmd_table[command]) +			return -EINVAL; -		if (((hdr_ex.in_words + hdr_ex.provider_in_words) * 4) != count) +		if (!file->ucontext && +		    command != IB_USER_VERBS_CMD_GET_CONTEXT)  			return -EINVAL; -		return uverbs_cmd_table[hdr.command](file, -						     buf + sizeof(hdr_ex), -						     (hdr_ex.in_words + -						      hdr_ex.provider_in_words) * 4, -						     (hdr_ex.out_words + -						      hdr_ex.provider_out_words) * 4); -	} else { +		if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command))) +			return -ENOSYS; +  		if (hdr.in_words * 4 != count)  			return -EINVAL; -		return uverbs_cmd_table[hdr.command](file, -						     buf + sizeof(hdr), -						     hdr.in_words * 4, -						     hdr.out_words * 4); +		return uverbs_cmd_table[command](file, +						 buf + sizeof(hdr), +						 hdr.in_words * 4, +						 hdr.out_words * 4); + +	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { +		__u32 command; + +		struct ib_uverbs_ex_cmd_hdr ex_hdr; +		struct ib_udata ucore; +		struct ib_udata uhw; +		int err; +		size_t written_count = count; + +		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | +					   IB_USER_VERBS_CMD_COMMAND_MASK)) +			return -EINVAL; + +		command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + +		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || +		    !uverbs_ex_cmd_table[command]) +			return -ENOSYS; + +		if (!file->ucontext) +			return -EINVAL; + +		if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command))) +			return -ENOSYS; + +		if (count < (sizeof(hdr) + sizeof(ex_hdr))) +			return -EINVAL; + +		if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) +			return -EFAULT; + +		count -= sizeof(hdr) + sizeof(ex_hdr); +		buf += sizeof(hdr) + sizeof(ex_hdr); + +		if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) +			return -EINVAL; + +		if (ex_hdr.cmd_hdr_reserved) +			return -EINVAL; + +		if (ex_hdr.response) { +			if (!hdr.out_words && !ex_hdr.provider_out_words) +				return -EINVAL; + +			if (!access_ok(VERIFY_WRITE, +				       (void __user *) (unsigned long) ex_hdr.response, +				       (hdr.out_words + ex_hdr.provider_out_words) * 8)) +				return -EFAULT; +		} else { +			if (hdr.out_words || ex_hdr.provider_out_words) +				return -EINVAL; +		} + +		INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, +				       hdr.in_words * 8, hdr.out_words * 8); + +		INIT_UDATA_BUF_OR_NULL(&uhw, +				       buf + ucore.inlen, +				       (unsigned long) ex_hdr.response + ucore.outlen, +				       ex_hdr.provider_in_words * 8, +				       ex_hdr.provider_out_words * 8); + +		err = uverbs_ex_cmd_table[command](file, +						   &ucore, +						   &uhw); + +		if (err) +			return err; + +		return written_count;  	} + +	return -ENOSYS;  }  static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index a321df28bab..c2b89cc5dbc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -44,8 +44,11 @@  #include <rdma/ib_verbs.h>  #include <rdma/ib_cache.h> +#include <rdma/ib_addr.h> -int ib_rate_to_mult(enum ib_rate rate) +#include "core_priv.h" + +__attribute_const__ int ib_rate_to_mult(enum ib_rate rate)  {  	switch (rate) {  	case IB_RATE_2_5_GBPS: return  1; @@ -62,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate)  }  EXPORT_SYMBOL(ib_rate_to_mult); -enum ib_rate mult_to_ib_rate(int mult) +__attribute_const__ enum ib_rate mult_to_ib_rate(int mult)  {  	switch (mult) {  	case 1:  return IB_RATE_2_5_GBPS; @@ -79,7 +82,7 @@ enum ib_rate mult_to_ib_rate(int mult)  }  EXPORT_SYMBOL(mult_to_ib_rate); -int ib_rate_to_mbps(enum ib_rate rate) +__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)  {  	switch (rate) {  	case IB_RATE_2_5_GBPS: return 2500; @@ -104,7 +107,7 @@ int ib_rate_to_mbps(enum ib_rate rate)  }  EXPORT_SYMBOL(ib_rate_to_mbps); -enum rdma_transport_type +__attribute_const__ enum rdma_transport_type  rdma_node_get_transport(enum rdma_node_type node_type)  {  	switch (node_type) { @@ -114,6 +117,10 @@ rdma_node_get_transport(enum rdma_node_type node_type)  		return RDMA_TRANSPORT_IB;  	case RDMA_NODE_RNIC:  		return RDMA_TRANSPORT_IWARP; +	case RDMA_NODE_USNIC: +		return RDMA_TRANSPORT_USNIC; +	case RDMA_NODE_USNIC_UDP: +		return RDMA_TRANSPORT_USNIC_UDP;  	default:  		BUG();  		return 0; @@ -130,6 +137,8 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_  	case RDMA_TRANSPORT_IB:  		return IB_LINK_LAYER_INFINIBAND;  	case RDMA_TRANSPORT_IWARP: +	case RDMA_TRANSPORT_USNIC: +	case RDMA_TRANSPORT_USNIC_UDP:  		return IB_LINK_LAYER_ETHERNET;  	default:  		return IB_LINK_LAYER_UNSPECIFIED; @@ -189,8 +198,28 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,  	u32 flow_class;  	u16 gid_index;  	int ret; +	int is_eth = (rdma_port_get_link_layer(device, port_num) == +			IB_LINK_LAYER_ETHERNET);  	memset(ah_attr, 0, sizeof *ah_attr); +	if (is_eth) { +		if (!(wc->wc_flags & IB_WC_GRH)) +			return -EPROTOTYPE; + +		if (wc->wc_flags & IB_WC_WITH_SMAC && +		    wc->wc_flags & IB_WC_WITH_VLAN) { +			memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); +			ah_attr->vlan_id = wc->vlan_id; +		} else { +			ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, +					ah_attr->dmac, &ah_attr->vlan_id); +			if (ret) +				return ret; +		} +	} else { +		ah_attr->vlan_id = 0xffff; +	} +  	ah_attr->dlid = wc->slid;  	ah_attr->sl = wc->sl;  	ah_attr->src_path_bits = wc->dlid_path_bits; @@ -473,7 +502,9 @@ EXPORT_SYMBOL(ib_create_qp);  static const struct {  	int			valid;  	enum ib_qp_attr_mask	req_param[IB_QPT_MAX]; +	enum ib_qp_attr_mask	req_param_add_eth[IB_QPT_MAX];  	enum ib_qp_attr_mask	opt_param[IB_QPT_MAX]; +	enum ib_qp_attr_mask	opt_param_add_eth[IB_QPT_MAX];  } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {  	[IB_QPS_RESET] = {  		[IB_QPS_RESET] = { .valid = 1 }, @@ -554,6 +585,12 @@ static const struct {  						IB_QP_MAX_DEST_RD_ATOMIC	|  						IB_QP_MIN_RNR_TIMER),  			}, +			.req_param_add_eth = { +				[IB_QPT_RC]  = (IB_QP_SMAC), +				[IB_QPT_UC]  = (IB_QP_SMAC), +				[IB_QPT_XRC_INI]  = (IB_QP_SMAC), +				[IB_QPT_XRC_TGT]  = (IB_QP_SMAC) +			},  			.opt_param = {  				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|  						 IB_QP_QKEY), @@ -573,7 +610,21 @@ static const struct {  						 IB_QP_QKEY),  				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|  						 IB_QP_QKEY), -			 } +			 }, +			.opt_param_add_eth = { +				[IB_QPT_RC]  = (IB_QP_ALT_SMAC			| +						IB_QP_VID			| +						IB_QP_ALT_VID), +				[IB_QPT_UC]  = (IB_QP_ALT_SMAC			| +						IB_QP_VID			| +						IB_QP_ALT_VID), +				[IB_QPT_XRC_INI]  = (IB_QP_ALT_SMAC			| +						IB_QP_VID			| +						IB_QP_ALT_VID), +				[IB_QPT_XRC_TGT]  = (IB_QP_ALT_SMAC			| +						IB_QP_VID			| +						IB_QP_ALT_VID) +			}  		}  	},  	[IB_QPS_RTR]   = { @@ -776,7 +827,8 @@ static const struct {  };  int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, -		       enum ib_qp_type type, enum ib_qp_attr_mask mask) +		       enum ib_qp_type type, enum ib_qp_attr_mask mask, +		       enum rdma_link_layer ll)  {  	enum ib_qp_attr_mask req_param, opt_param; @@ -795,6 +847,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,  	req_param = qp_state_table[cur_state][next_state].req_param[type];  	opt_param = qp_state_table[cur_state][next_state].opt_param[type]; +	if (ll == IB_LINK_LAYER_ETHERNET) { +		req_param |= qp_state_table[cur_state][next_state]. +			req_param_add_eth[type]; +		opt_param |= qp_state_table[cur_state][next_state]. +			opt_param_add_eth[type]; +	} +  	if ((mask & req_param) != req_param)  		return 0; @@ -805,10 +864,51 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,  }  EXPORT_SYMBOL(ib_modify_qp_is_ok); +int ib_resolve_eth_l2_attrs(struct ib_qp *qp, +			    struct ib_qp_attr *qp_attr, int *qp_attr_mask) +{ +	int           ret = 0; +	union ib_gid  sgid; + +	if ((*qp_attr_mask & IB_QP_AV)  && +	    (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) { +		ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, +				   qp_attr->ah_attr.grh.sgid_index, &sgid); +		if (ret) +			goto out; +		if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { +			rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); +			rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); +			qp_attr->vlan_id = rdma_get_vlan_id(&sgid); +		} else { +			ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, +					qp_attr->ah_attr.dmac, &qp_attr->vlan_id); +			if (ret) +				goto out; +			ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); +			if (ret) +				goto out; +		} +		*qp_attr_mask |= IB_QP_SMAC; +		if (qp_attr->vlan_id < 0xFFFF) +			*qp_attr_mask |= IB_QP_VID; +	} +out: +	return ret; +} +EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); + +  int ib_modify_qp(struct ib_qp *qp,  		 struct ib_qp_attr *qp_attr,  		 int qp_attr_mask)  { +	int ret; + +	ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); +	if (ret) +		return ret; +  	return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);  }  EXPORT_SYMBOL(ib_modify_qp); @@ -958,6 +1058,11 @@ EXPORT_SYMBOL(ib_resize_cq);  struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)  {  	struct ib_mr *mr; +	int err; + +	err = ib_check_mr_access(mr_access_flags); +	if (err) +		return ERR_PTR(err);  	mr = pd->device->get_dma_mr(pd, mr_access_flags); @@ -980,6 +1085,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd,  			     u64 *iova_start)  {  	struct ib_mr *mr; +	int err; + +	err = ib_check_mr_access(mr_access_flags); +	if (err) +		return ERR_PTR(err);  	if (!pd->device->reg_phys_mr)  		return ERR_PTR(-ENOSYS); @@ -1010,6 +1120,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr,  	struct ib_pd *old_pd;  	int ret; +	ret = ib_check_mr_access(mr_access_flags); +	if (ret) +		return ret; +  	if (!mr->device->rereg_phys_mr)  		return -ENOSYS; @@ -1055,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr)  }  EXPORT_SYMBOL(ib_dereg_mr); +struct ib_mr *ib_create_mr(struct ib_pd *pd, +			   struct ib_mr_init_attr *mr_init_attr) +{ +	struct ib_mr *mr; + +	if (!pd->device->create_mr) +		return ERR_PTR(-ENOSYS); + +	mr = pd->device->create_mr(pd, mr_init_attr); + +	if (!IS_ERR(mr)) { +		mr->device  = pd->device; +		mr->pd      = pd; +		mr->uobject = NULL; +		atomic_inc(&pd->usecnt); +		atomic_set(&mr->usecnt, 0); +	} + +	return mr; +} +EXPORT_SYMBOL(ib_create_mr); + +int ib_destroy_mr(struct ib_mr *mr) +{ +	struct ib_pd *pd; +	int ret; + +	if (atomic_read(&mr->usecnt)) +		return -EBUSY; + +	pd = mr->pd; +	ret = mr->device->destroy_mr(mr); +	if (!ret) +		atomic_dec(&pd->usecnt); + +	return ret; +} +EXPORT_SYMBOL(ib_destroy_mr); +  struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)  {  	struct ib_mr *mr; @@ -1284,3 +1437,11 @@ int ib_destroy_flow(struct ib_flow *flow_id)  	return err;  }  EXPORT_SYMBOL(ib_destroy_flow); + +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, +		       struct ib_mr_status *mr_status) +{ +	return mr->device->check_mr_status ? +		mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; +} +EXPORT_SYMBOL(ib_check_mr_status);  | 
