diff options
Diffstat (limited to 'drivers/infiniband/core')
25 files changed, 4132 insertions, 1018 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index c8bbaef1bec..ffd0af6734a 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -1,8 +1,9 @@ -infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := ib_addr.o rdma_cm.o +infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_cm.o user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o \ - ib_cm.o iw_cm.o $(infiniband-y) + ib_cm.o iw_cm.o ib_addr.o \ + $(infiniband-y) obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ $(user_access-y) @@ -17,7 +18,7 @@ ib_sa-y := sa_query.o multicast.o ib_cm-y := cm.o -iw_cm-y := iwcm.o +iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o rdma_cm-y := cma.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1612cfd50f3..8172d37f9ad 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -45,6 +45,7 @@ #include <net/addrconf.h> #include <net/ip6_route.h> #include <rdma/ib_addr.h> +#include <rdma/ib.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("IB Address Translation"); @@ -70,6 +71,23 @@ static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; +int rdma_addr_size(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + case AF_IB: + return sizeof(struct sockaddr_ib); + default: + return 0; + } +} +EXPORT_SYMBOL(rdma_addr_size); + +static struct rdma_addr_client self; + void rdma_addr_register_client(struct rdma_addr_client *client) { atomic_set(&client->refcount, 1); @@ -103,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, } EXPORT_SYMBOL(rdma_copy_addr); -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, + u16 *vlan_id) { struct net_device *dev; int ret = -EADDRNOTAVAIL; @@ -126,10 +145,12 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) return ret; ret = rdma_copy_addr(dev_addr, dev, NULL); + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(dev); dev_put(dev); break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) case AF_INET6: rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { @@ -137,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) &((struct sockaddr_in6 *) addr)->sin6_addr, dev, 1)) { ret = rdma_copy_addr(dev_addr, dev, NULL); + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(dev); break; } } @@ -152,13 +175,11 @@ static void set_timeout(unsigned long time) { unsigned long delay; - cancel_delayed_work(&work); - delay = time - jiffies; if ((long)delay <= 0) delay = 1; - queue_delayed_work(addr_wq, &work, delay); + mod_delayed_work(addr_wq, &work, delay); } static void queue_req(struct addr_req *req) @@ -178,22 +199,26 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *addr) +static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr) { struct neighbour *n; int ret; + n = dst_neigh_lookup(dst, daddr); + rcu_read_lock(); - n = dst_get_neighbour_noref(dst); if (!n || !(n->nud_state & NUD_VALID)) { if (n) neigh_event_send(n, NULL); ret = -ENODATA; } else { - ret = rdma_copy_addr(addr, dst->dev, n->ha); + ret = rdma_copy_addr(dev_addr, dst->dev, n->ha); } rcu_read_unlock(); + if (n) + neigh_release(n); + return ret; } @@ -220,7 +245,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, src_in->sin_addr.s_addr = fl4.saddr; if (rt->dst.dev->flags & IFF_LOOPBACK) { - ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL); if (!ret) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); goto put; @@ -232,14 +257,14 @@ static int addr4_resolve(struct sockaddr_in *src_in, goto put; } - ret = dst_fetch_ha(&rt->dst, addr); + ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr); put: ip_rt_put(rt); out: return ret; } -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) static int addr6_resolve(struct sockaddr_in6 *src_in, struct sockaddr_in6 *dst_in, struct rdma_dev_addr *addr) @@ -268,7 +293,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, } if (dst->dev->flags & IFF_LOOPBACK) { - ret = rdma_translate_ip((struct sockaddr *) dst_in, addr); + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL); if (!ret) memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); goto put; @@ -280,7 +305,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, goto put; } - ret = dst_fetch_ha(dst, addr); + ret = dst_fetch_ha(dst, addr, &fl6.daddr); put: dst_release(dst); return ret; @@ -367,12 +392,12 @@ int rdma_resolve_ip(struct rdma_addr_client *client, goto err; } - memcpy(src_in, src_addr, ip_addr_size(src_addr)); + memcpy(src_in, src_addr, rdma_addr_size(src_addr)); } else { src_in->sa_family = dst_addr->sa_family; } - memcpy(dst_in, dst_addr, ip_addr_size(dst_addr)); + memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; @@ -419,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) } EXPORT_SYMBOL(rdma_addr_cancel); +struct resolve_cb_context { + struct rdma_dev_addr *addr; + struct completion comp; +}; + +static void resolve_cb(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *addr, void *context) +{ + memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct + rdma_dev_addr)); + complete(&((struct resolve_cb_context *)context)->comp); +} + +int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, + u16 *vlan_id) +{ + int ret = 0; + struct rdma_dev_addr dev_addr; + struct resolve_cb_context ctx; + struct net_device *dev; + + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; + + + ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); + if (ret) + return ret; + + ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); + if (ret) + return ret; + + memset(&dev_addr, 0, sizeof(dev_addr)); + + ctx.addr = &dev_addr; + init_completion(&ctx.comp); + ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, + &dev_addr, 1000, resolve_cb, &ctx); + if (ret) + return ret; + + wait_for_completion(&ctx.comp); + + memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); + dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); + if (!dev) + return -ENODEV; + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + return ret; +} +EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); + +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) +{ + int ret = 0; + struct rdma_dev_addr dev_addr; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } gid_addr; + + ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); + + if (ret) + return ret; + memset(&dev_addr, 0, sizeof(dev_addr)); + ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); + if (ret) + return ret; + + memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); + return ret; +} +EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); + static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { @@ -443,11 +550,13 @@ static int __init addr_init(void) return -ENOMEM; register_netevent_notifier(&nb); + rdma_addr_register_client(&self); return 0; } static void __exit addr_cleanup(void) { + rdma_addr_unregister_client(&self); unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 9353992f9ee..80f6cf2449f 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -167,6 +167,7 @@ int ib_find_cached_pkey(struct ib_device *device, unsigned long flags; int i; int ret = -ENOENT; + int partial_ix = -1; if (port_num < start_port(device) || port_num > end_port(device)) return -EINVAL; @@ -179,6 +180,46 @@ int ib_find_cached_pkey(struct ib_device *device, for (i = 0; i < cache->table_len; ++i) if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { + if (cache->table[i] & 0x8000) { + *index = i; + ret = 0; + break; + } else + partial_ix = i; + } + + if (ret && partial_ix >= 0) { + *index = partial_ix; + ret = 0; + } + + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_find_cached_pkey); + +int ib_find_exact_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index) +{ + struct ib_pkey_cache *cache; + unsigned long flags; + int i; + int ret = -ENOENT; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, flags); + + cache = device->cache.pkey_cache[port_num - start_port(device)]; + + *index = -1; + + for (i = 0; i < cache->table_len; ++i) + if (cache->table[i] == pkey) { *index = i; ret = 0; break; @@ -188,7 +229,7 @@ int ib_find_cached_pkey(struct ib_device *device, return ret; } -EXPORT_SYMBOL(ib_find_cached_pkey); +EXPORT_SYMBOL(ib_find_exact_cached_pkey); int ib_get_cached_lmc(struct ib_device *device, u8 port_num, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c889aaef341..c3239170d8b 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -47,6 +47,7 @@ #include <linux/sysfs.h> #include <linux/workqueue.h> #include <linux/kdev_t.h> +#include <linux/etherdevice.h> #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> @@ -177,6 +178,8 @@ struct cm_av { struct ib_ah_attr ah_attr; u16 pkey_index; u8 timeout; + u8 valid; + u8 smac[ETH_ALEN]; }; struct cm_work { @@ -376,26 +379,27 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av) ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path, &av->ah_attr); av->timeout = path->packet_life_time + 1; + memcpy(av->smac, path->smac, sizeof(av->smac)); + + av->valid = 1; return 0; } static int cm_alloc_id(struct cm_id_private *cm_id_priv) { unsigned long flags; - int ret, id; - static int next_id; + int id; - do { - spin_lock_irqsave(&cm.lock, flags); - ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, - next_id, &id); - if (!ret) - next_id = ((unsigned) id + 1) & MAX_ID_MASK; - spin_unlock_irqrestore(&cm.lock, flags); - } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); + idr_preload(GFP_KERNEL); + spin_lock_irqsave(&cm.lock, flags); + + id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT); + + spin_unlock_irqrestore(&cm.lock, flags); + idr_preload_end(); cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand; - return ret; + return id < 0 ? id : 0; } static void cm_free_id(__be32 local_id) @@ -1556,6 +1560,9 @@ static int cm_req_handler(struct cm_work *work) cm_process_routed_req(req_msg, work->mad_recv_wc->wc); cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]); + + memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN); + work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id; ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av); if (ret) { ib_get_cached_gid(work->port->cm_dev->ib_device, @@ -3502,6 +3509,32 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; + if (!cm_id_priv->av.valid) { + spin_unlock_irqrestore(&cm_id_priv->lock, flags); + return -EINVAL; + } + if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) { + qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id; + *qp_attr_mask |= IB_QP_VID; + } + if (!is_zero_ether_addr(cm_id_priv->av.smac)) { + memcpy(qp_attr->smac, cm_id_priv->av.smac, + sizeof(qp_attr->smac)); + *qp_attr_mask |= IB_QP_SMAC; + } + if (cm_id_priv->alt_av.valid) { + if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) { + qp_attr->alt_vlan_id = + cm_id_priv->alt_av.ah_attr.vlan_id; + *qp_attr_mask |= IB_QP_ALT_VID; + } + if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) { + memcpy(qp_attr->alt_smac, + cm_id_priv->alt_av.smac, + sizeof(qp_attr->alt_smac)); + *qp_attr_mask |= IB_QP_ALT_SMAC; + } + } qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); @@ -3844,28 +3877,31 @@ static int __init ib_cm_init(void) cm.remote_sidr_table = RB_ROOT; idr_init(&cm.local_id_table); get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); - idr_pre_get(&cm.local_id_table, GFP_KERNEL); INIT_LIST_HEAD(&cm.timewait_list); ret = class_register(&cm_class); - if (ret) - return -ENOMEM; + if (ret) { + ret = -ENOMEM; + goto error1; + } cm.wq = create_workqueue("ib_cm"); if (!cm.wq) { ret = -ENOMEM; - goto error1; + goto error2; } ret = ib_register_client(&cm_client); if (ret) - goto error2; + goto error3; return 0; -error2: +error3: destroy_workqueue(cm.wq); -error1: +error2: class_unregister(&cm_class); +error1: + idr_destroy(&cm.local_id_table); return ret; } diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 7da9b210234..be068f47e47 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -44,18 +44,6 @@ #define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ -#define CM_REQ_ATTR_ID cpu_to_be16(0x0010) -#define CM_MRA_ATTR_ID cpu_to_be16(0x0011) -#define CM_REJ_ATTR_ID cpu_to_be16(0x0012) -#define CM_REP_ATTR_ID cpu_to_be16(0x0013) -#define CM_RTU_ATTR_ID cpu_to_be16(0x0014) -#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015) -#define CM_DREP_ATTR_ID cpu_to_be16(0x0016) -#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017) -#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018) -#define CM_LAP_ATTR_ID cpu_to_be16(0x0019) -#define CM_APR_ATTR_ID cpu_to_be16(0x001A) - enum cm_msg_sequence { CM_MSG_SEQUENCE_REQ, CM_MSG_SEQUENCE_LAP, diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e3e470fecaa..d570030d899 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -42,6 +42,7 @@ #include <linux/inetdevice.h> #include <linux/slab.h> #include <linux/module.h> +#include <net/route.h> #include <net/tcp.h> #include <net/ipv6.h> @@ -49,6 +50,7 @@ #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> #include <rdma/rdma_netlink.h> +#include <rdma/ib.h> #include <rdma/ib_cache.h> #include <rdma/ib_cm.h> #include <rdma/ib_sa.h> @@ -78,7 +80,6 @@ static LIST_HEAD(dev_list); static LIST_HEAD(listen_any_list); static DEFINE_MUTEX(lock); static struct workqueue_struct *cma_wq; -static DEFINE_IDR(sdp_ps); static DEFINE_IDR(tcp_ps); static DEFINE_IDR(udp_ps); static DEFINE_IDR(ipoib_ps); @@ -98,6 +99,10 @@ struct rdma_bind_list { unsigned short port; }; +enum { + CMA_OPTION_AFONLY, +}; + /* * Device removal can occur at anytime, so we need extra handling to * serialize notifying the user of device removal with other callbacks. @@ -136,9 +141,11 @@ struct rdma_id_private { u32 qkey; u32 qp_num; pid_t owner; + u32 options; u8 srq; u8 tos; u8 reuseaddr; + u8 afonly; }; struct cma_multicast { @@ -188,24 +195,7 @@ struct cma_hdr { union cma_ip_addr dst_addr; }; -struct sdp_hh { - u8 bsdh[16]; - u8 sdp_version; /* Major version: 7:4 */ - u8 ip_version; /* IP version: 7:4 */ - u8 sdp_specific1[10]; - __be16 port; - __be16 sdp_specific2; - union cma_ip_addr src_addr; - union cma_ip_addr dst_addr; -}; - -struct sdp_hah { - u8 bsdh[16]; - u8 sdp_version; -}; - #define CMA_VERSION 0x00 -#define SDP_MAJ_VERSION 0x2 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) { @@ -254,21 +244,6 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); } -static inline u8 sdp_get_majv(u8 sdp_version) -{ - return sdp_version >> 4; -} - -static inline u8 sdp_get_ip_ver(struct sdp_hh *hh) -{ - return hh->ip_version >> 4; -} - -static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver) -{ - hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF); -} - static void cma_attach_to_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { @@ -303,16 +278,40 @@ static void cma_release_dev(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static int cma_set_qkey(struct rdma_id_private *id_priv) +static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *) &id_priv->id.route.addr.src_addr; +} + +static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) +{ + return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; +} + +static inline unsigned short cma_family(struct rdma_id_private *id_priv) +{ + return id_priv->id.route.addr.src_addr.ss_family; +} + +static int cma_set_qkey(struct rdma_id_private *id_priv, u32 qkey) { struct ib_sa_mcmember_rec rec; int ret = 0; - if (id_priv->qkey) + if (id_priv->qkey) { + if (qkey && id_priv->qkey != qkey) + return -EINVAL; return 0; + } + + if (qkey) { + id_priv->qkey = qkey; + return 0; + } switch (id_priv->id.ps) { case RDMA_PS_UDP: + case RDMA_PS_IB: id_priv->qkey = RDMA_UDP_QKEY; break; case RDMA_PS_IPOIB: @@ -329,35 +328,35 @@ static int cma_set_qkey(struct rdma_id_private *id_priv) return ret; } -static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num) +static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr) { - int i; - int err; - struct ib_port_attr props; - union ib_gid tmp; + dev_addr->dev_type = ARPHRD_INFINIBAND; + rdma_addr_set_sgid(dev_addr, (union ib_gid *) &sib->sib_addr); + ib_addr_set_pkey(dev_addr, ntohs(sib->sib_pkey)); +} - err = ib_query_port(device, port_num, &props); - if (err) - return 1; +static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +{ + int ret; - for (i = 0; i < props.gid_tbl_len; ++i) { - err = ib_query_gid(device, port_num, i, &tmp); - if (err) - return 1; - if (!memcmp(&tmp, gid, sizeof tmp)) - return 0; + if (addr->sa_family != AF_IB) { + ret = rdma_translate_ip(addr, dev_addr, NULL); + } else { + cma_translate_ib((struct sockaddr_ib *) addr, dev_addr); + ret = 0; } - return -EAGAIN; + return ret; } -static int cma_acquire_dev(struct rdma_id_private *id_priv) +static int cma_acquire_dev(struct rdma_id_private *id_priv, + struct rdma_id_private *listen_id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct cma_device *cma_dev; union ib_gid gid, iboe_gid; int ret = -ENODEV; - u8 port; + u8 port, found_port; enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ? IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; @@ -366,23 +365,46 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv) return -EINVAL; mutex_lock(&lock); - iboe_addr_get_sgid(dev_addr, &iboe_gid); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &iboe_gid); + memcpy(&gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof gid); + if (listen_id_priv && + rdma_port_get_link_layer(listen_id_priv->id.device, + listen_id_priv->id.port_num) == dev_ll) { + cma_dev = listen_id_priv->cma_dev; + port = listen_id_priv->id.port_num; + if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && + rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) + ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, + &found_port, NULL); + else + ret = ib_find_cached_gid(cma_dev->device, &gid, + &found_port, NULL); + + if (!ret && (port == found_port)) { + id_priv->id.port_num = found_port; + goto out; + } + } list_for_each_entry(cma_dev, &dev_list, list) { for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) { + if (listen_id_priv && + listen_id_priv->cma_dev == cma_dev && + listen_id_priv->id.port_num == port) + continue; if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) { if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB && rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET) - ret = find_gid_port(cma_dev->device, &iboe_gid, port); + ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL); else - ret = find_gid_port(cma_dev->device, &gid, port); + ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL); - if (!ret) { - id_priv->id.port_num = port; + if (!ret && (port == found_port)) { + id_priv->id.port_num = found_port; goto out; - } else if (ret == 1) - break; + } } } } @@ -395,6 +417,60 @@ out: return ret; } +/* + * Select the source IB device and address to reach the destination IB address. + */ +static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) +{ + struct cma_device *cma_dev, *cur_dev; + struct sockaddr_ib *addr; + union ib_gid gid, sgid, *dgid; + u16 pkey, index; + u8 p; + int i; + + cma_dev = NULL; + addr = (struct sockaddr_ib *) cma_dst_addr(id_priv); + dgid = (union ib_gid *) &addr->sib_addr; + pkey = ntohs(addr->sib_pkey); + + list_for_each_entry(cur_dev, &dev_list, list) { + if (rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) + continue; + + for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index)) + continue; + + for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) { + if (!memcmp(&gid, dgid, sizeof(gid))) { + cma_dev = cur_dev; + sgid = gid; + id_priv->id.port_num = p; + goto found; + } + + if (!cma_dev && (gid.global.subnet_prefix == + dgid->global.subnet_prefix)) { + cma_dev = cur_dev; + sgid = gid; + id_priv->id.port_num = p; + } + } + } + } + + if (!cma_dev) + return -ENODEV; + +found: + cma_attach_to_dev(id_priv, cma_dev); + addr = (struct sockaddr_ib *) cma_src_addr(id_priv); + memcpy(&addr->sib_addr, &sgid, sizeof sgid); + cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr); + return 0; +} + static void cma_deref_id(struct rdma_id_private *id_priv) { if (atomic_dec_and_test(&id_priv->refcount)) @@ -529,6 +605,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; + union ib_gid sgid; mutex_lock(&id_priv->qp_mutex); if (!id_priv->id.qp) { @@ -551,6 +628,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv, if (ret) goto out; + ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num, + qp_attr.ah_attr.grh.sgid_index, &sgid); + if (ret) + goto out; + + if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) + == RDMA_TRANSPORT_IB && + rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) + == IB_LINK_LAYER_ETHERNET) { + ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL); + + if (ret) + goto out; + } if (conn_param) qp_attr.max_dest_rd_atomic = conn_param->responder_resources; ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); @@ -624,7 +715,7 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT; if (id_priv->id.qp_type == IB_QPT_UD) { - ret = cma_set_qkey(id_priv); + ret = cma_set_qkey(id_priv, 0); if (ret) return ret; @@ -651,6 +742,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr, else ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr, qp_attr_mask); + if (qp_attr->qp_state == IB_QPS_RTR) qp_attr->rq_psn = id_priv->seq_num; break; @@ -673,26 +765,30 @@ EXPORT_SYMBOL(rdma_init_qp_attr); static inline int cma_zero_addr(struct sockaddr *addr) { - struct in6_addr *ip6; - - if (addr->sa_family == AF_INET) - return ipv4_is_zeronet( - ((struct sockaddr_in *)addr)->sin_addr.s_addr); - else { - ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; - return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | - ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; + switch (addr->sa_family) { + case AF_INET: + return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr); + case AF_INET6: + return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr); + case AF_IB: + return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr); + default: + return 0; } } static inline int cma_loopback_addr(struct sockaddr *addr) { - if (addr->sa_family == AF_INET) - return ipv4_is_loopback( - ((struct sockaddr_in *) addr)->sin_addr.s_addr); - else - return ipv6_addr_loopback( - &((struct sockaddr_in6 *) addr)->sin6_addr); + switch (addr->sa_family) { + case AF_INET: + return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr); + case AF_INET6: + return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr); + case AF_IB: + return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr); + default: + return 0; + } } static inline int cma_any_addr(struct sockaddr *addr) @@ -709,18 +805,31 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) case AF_INET: return ((struct sockaddr_in *) src)->sin_addr.s_addr != ((struct sockaddr_in *) dst)->sin_addr.s_addr; - default: + case AF_INET6: return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr, &((struct sockaddr_in6 *) dst)->sin6_addr); + default: + return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr, + &((struct sockaddr_ib *) dst)->sib_addr); } } -static inline __be16 cma_port(struct sockaddr *addr) +static __be16 cma_port(struct sockaddr *addr) { - if (addr->sa_family == AF_INET) + struct sockaddr_ib *sib; + + switch (addr->sa_family) { + case AF_INET: return ((struct sockaddr_in *) addr)->sin_port; - else + case AF_INET6: return ((struct sockaddr_in6 *) addr)->sin6_port; + case AF_IB: + sib = (struct sockaddr_ib *) addr; + return htons((u16) (be64_to_cpu(sib->sib_sid) & + be64_to_cpu(sib->sib_sid_mask))); + default: + return 0; + } } static inline int cma_any_port(struct sockaddr *addr) @@ -728,83 +837,93 @@ static inline int cma_any_port(struct sockaddr *addr) return !cma_port(addr); } -static int cma_get_net_info(void *hdr, enum rdma_port_space ps, - u8 *ip_ver, __be16 *port, - union cma_ip_addr **src, union cma_ip_addr **dst) +static void cma_save_ib_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, + struct ib_sa_path_rec *path) { - switch (ps) { - case RDMA_PS_SDP: - if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) != - SDP_MAJ_VERSION) - return -EINVAL; + struct sockaddr_ib *listen_ib, *ib; - *ip_ver = sdp_get_ip_ver(hdr); - *port = ((struct sdp_hh *) hdr)->port; - *src = &((struct sdp_hh *) hdr)->src_addr; - *dst = &((struct sdp_hh *) hdr)->dst_addr; - break; - default: - if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION) - return -EINVAL; - - *ip_ver = cma_get_ip_ver(hdr); - *port = ((struct cma_hdr *) hdr)->port; - *src = &((struct cma_hdr *) hdr)->src_addr; - *dst = &((struct cma_hdr *) hdr)->dst_addr; - break; - } + listen_ib = (struct sockaddr_ib *) &listen_id->route.addr.src_addr; + ib = (struct sockaddr_ib *) &id->route.addr.src_addr; + ib->sib_family = listen_ib->sib_family; + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->sgid, 16); + ib->sib_sid = listen_ib->sib_sid; + ib->sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL); + ib->sib_scope_id = listen_ib->sib_scope_id; - if (*ip_ver != 4 && *ip_ver != 6) - return -EINVAL; - return 0; + ib = (struct sockaddr_ib *) &id->route.addr.dst_addr; + ib->sib_family = listen_ib->sib_family; + ib->sib_pkey = path->pkey; + ib->sib_flowinfo = path->flow_label; + memcpy(&ib->sib_addr, &path->dgid, 16); } -static void cma_save_net_info(struct rdma_addr *addr, - struct rdma_addr *listen_addr, - u8 ip_ver, __be16 port, - union cma_ip_addr *src, union cma_ip_addr *dst) +static void cma_save_ip4_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, + struct cma_hdr *hdr) { struct sockaddr_in *listen4, *ip4; + + listen4 = (struct sockaddr_in *) &listen_id->route.addr.src_addr; + ip4 = (struct sockaddr_in *) &id->route.addr.src_addr; + ip4->sin_family = listen4->sin_family; + ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr; + ip4->sin_port = listen4->sin_port; + + ip4 = (struct sockaddr_in *) &id->route.addr.dst_addr; + ip4->sin_family = listen4->sin_family; + ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr; + ip4->sin_port = hdr->port; +} + +static void cma_save_ip6_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, + struct cma_hdr *hdr) +{ struct sockaddr_in6 *listen6, *ip6; - switch (ip_ver) { + listen6 = (struct sockaddr_in6 *) &listen_id->route.addr.src_addr; + ip6 = (struct sockaddr_in6 *) &id->route.addr.src_addr; + ip6->sin6_family = listen6->sin6_family; + ip6->sin6_addr = hdr->dst_addr.ip6; + ip6->sin6_port = listen6->sin6_port; + + ip6 = (struct sockaddr_in6 *) &id->route.addr.dst_addr; + ip6->sin6_family = listen6->sin6_family; + ip6->sin6_addr = hdr->src_addr.ip6; + ip6->sin6_port = hdr->port; +} + +static int cma_save_net_info(struct rdma_cm_id *id, struct rdma_cm_id *listen_id, + struct ib_cm_event *ib_event) +{ + struct cma_hdr *hdr; + + if ((listen_id->route.addr.src_addr.ss_family == AF_IB) && + (ib_event->event == IB_CM_REQ_RECEIVED)) { + cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path); + return 0; + } + + hdr = ib_event->private_data; + if (hdr->cma_version != CMA_VERSION) + return -EINVAL; + + switch (cma_get_ip_ver(hdr)) { case 4: - listen4 = (struct sockaddr_in *) &listen_addr->src_addr; - ip4 = (struct sockaddr_in *) &addr->src_addr; - ip4->sin_family = listen4->sin_family; - ip4->sin_addr.s_addr = dst->ip4.addr; - ip4->sin_port = listen4->sin_port; - - ip4 = (struct sockaddr_in *) &addr->dst_addr; - ip4->sin_family = listen4->sin_family; - ip4->sin_addr.s_addr = src->ip4.addr; - ip4->sin_port = port; + cma_save_ip4_info(id, listen_id, hdr); break; case 6: - listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr; - ip6 = (struct sockaddr_in6 *) &addr->src_addr; - ip6->sin6_family = listen6->sin6_family; - ip6->sin6_addr = dst->ip6; - ip6->sin6_port = listen6->sin6_port; - - ip6 = (struct sockaddr_in6 *) &addr->dst_addr; - ip6->sin6_family = listen6->sin6_family; - ip6->sin6_addr = src->ip6; - ip6->sin6_port = port; + cma_save_ip6_info(id, listen_id, hdr); break; default: - break; + return -EINVAL; } + return 0; } -static inline int cma_user_data_offset(enum rdma_port_space ps) +static inline int cma_user_data_offset(struct rdma_id_private *id_priv) { - switch (ps) { - case RDMA_PS_SDP: - return 0; - default: - return sizeof(struct cma_hdr); - } + return cma_family(id_priv) == AF_IB ? 0 : sizeof(struct cma_hdr); } static void cma_cancel_route(struct rdma_id_private *id_priv) @@ -855,8 +974,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, cma_cancel_route(id_priv); break; case RDMA_CM_LISTEN: - if (cma_any_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr) - && !id_priv->cma_dev) + if (cma_any_addr(cma_src_addr(id_priv)) && !id_priv->cma_dev) cma_cancel_listens(id_priv); break; default: @@ -971,16 +1089,6 @@ reject: return ret; } -static int cma_verify_rep(struct rdma_id_private *id_priv, void *data) -{ - if (id_priv->id.ps == RDMA_PS_SDP && - sdp_get_majv(((struct sdp_hah *) data)->sdp_version) != - SDP_MAJ_VERSION) - return -EINVAL; - - return 0; -} - static void cma_set_rep_event_data(struct rdma_cm_event *event, struct ib_cm_rep_event_param *rep_data, void *private_data) @@ -1015,15 +1123,13 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: - event.status = cma_verify_rep(id_priv, ib_event->private_data); - if (event.status) - event.event = RDMA_CM_EVENT_CONNECT_ERROR; - else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) { + if (id_priv->id.qp) { event.status = cma_rep_recv(id_priv); event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR : RDMA_CM_EVENT_ESTABLISHED; - } else + } else { event.event = RDMA_CM_EVENT_CONNECT_RESPONSE; + } cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd, ib_event->private_data); break; @@ -1079,22 +1185,16 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, struct rdma_id_private *id_priv; struct rdma_cm_id *id; struct rdma_route *rt; - union cma_ip_addr *src, *dst; - __be16 port; - u8 ip_ver; int ret; - if (cma_get_net_info(ib_event->private_data, listen_id->ps, - &ip_ver, &port, &src, &dst)) - return NULL; - id = rdma_create_id(listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; - cma_save_net_info(&id->route.addr, &listen_id->route.addr, - ip_ver, port, src, dst); + id_priv = container_of(id, struct rdma_id_private, id); + if (cma_save_net_info(id, listen_id, ib_event)) + goto err; rt = &id->route; rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; @@ -1107,19 +1207,17 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (rt->num_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; - if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { + if (cma_any_addr(cma_src_addr(id_priv))) { rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); } else { - ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, - &rt->addr.dev_addr); + ret = cma_translate_addr(cma_src_addr(id_priv), &rt->addr.dev_addr); if (ret) goto err; } rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid); - id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = RDMA_CM_CONNECT; return id_priv; @@ -1133,9 +1231,6 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, { struct rdma_id_private *id_priv; struct rdma_cm_id *id; - union cma_ip_addr *src, *dst; - __be16 port; - u8 ip_ver; int ret; id = rdma_create_id(listen_id->event_handler, listen_id->context, @@ -1143,22 +1238,16 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, if (IS_ERR(id)) return NULL; - - if (cma_get_net_info(ib_event->private_data, listen_id->ps, - &ip_ver, &port, &src, &dst)) + id_priv = container_of(id, struct rdma_id_private, id); + if (cma_save_net_info(id, listen_id, ib_event)) goto err; - cma_save_net_info(&id->route.addr, &listen_id->route.addr, - ip_ver, port, src, dst); - if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) { - ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr, - &id->route.addr.dev_addr); + ret = cma_translate_addr(cma_src_addr(id_priv), &id->route.addr.dev_addr); if (ret) goto err; } - id_priv = container_of(id, struct rdma_id_private, id); id_priv->state = RDMA_CM_CONNECT; return id_priv; err: @@ -1183,7 +1272,7 @@ static void cma_set_req_event_data(struct rdma_cm_event *event, static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event) { - return (((ib_event->event == IB_CM_REQ_RECEIVED) || + return (((ib_event->event == IB_CM_REQ_RECEIVED) && (ib_event->param.req_rcvd.qp_type == id->qp_type)) || ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) && (id->qp_type == IB_QPT_UD)) || @@ -1204,7 +1293,7 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) return -ECONNABORTED; memset(&event, 0, sizeof event); - offset = cma_user_data_offset(listen_id->id.ps); + offset = cma_user_data_offset(listen_id); event.event = RDMA_CM_EVENT_CONNECT_REQUEST; if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { conn_id = cma_new_udp_id(&listen_id->id, ib_event); @@ -1218,13 +1307,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) } if (!conn_id) { ret = -ENOMEM; - goto out; + goto err1; } mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); - ret = cma_acquire_dev(conn_id); + ret = cma_acquire_dev(conn_id, listen_id); if (ret) - goto release_conn_id; + goto err2; conn_id->cm_id.ib = cm_id; cm_id->context = conn_id; @@ -1236,79 +1325,71 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) */ atomic_inc(&conn_id->refcount); ret = conn_id->id.event_handler(&conn_id->id, &event); - if (!ret) { - /* - * Acquire mutex to prevent user executing rdma_destroy_id() - * while we're accessing the cm_id. - */ - mutex_lock(&lock); - if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) - ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); - mutex_unlock(&lock); - mutex_unlock(&conn_id->handler_mutex); - cma_deref_id(conn_id); - goto out; - } + if (ret) + goto err3; + /* + * Acquire mutex to prevent user executing rdma_destroy_id() + * while we're accessing the cm_id. + */ + mutex_lock(&lock); + if (cma_comp(conn_id, RDMA_CM_CONNECT) && + (conn_id->id.qp_type != IB_QPT_UD)) + ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); + mutex_unlock(&lock); + mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); + return 0; +err3: + cma_deref_id(conn_id); /* Destroy the CM ID by returning a non-zero value. */ conn_id->cm_id.ib = NULL; - -release_conn_id: +err2: cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(&conn_id->id); - -out: +err1: mutex_unlock(&listen_id->handler_mutex); + if (conn_id) + rdma_destroy_id(&conn_id->id); return ret; } -static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr) +__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr) { - return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr))); + if (addr->sa_family == AF_IB) + return ((struct sockaddr_ib *) addr)->sib_sid; + + return cpu_to_be64(((u64)id->ps << 16) + be16_to_cpu(cma_port(addr))); } +EXPORT_SYMBOL(rdma_get_service_id); static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr, struct ib_cm_compare_data *compare) { struct cma_hdr *cma_data, *cma_mask; - struct sdp_hh *sdp_data, *sdp_mask; __be32 ip4_addr; struct in6_addr ip6_addr; memset(compare, 0, sizeof *compare); cma_data = (void *) compare->data; cma_mask = (void *) compare->mask; - sdp_data = (void *) compare->data; - sdp_mask = (void *) compare->mask; switch (addr->sa_family) { case AF_INET: ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr; - if (ps == RDMA_PS_SDP) { - sdp_set_ip_ver(sdp_data, 4); - sdp_set_ip_ver(sdp_mask, 0xF); - sdp_data->dst_addr.ip4.addr = ip4_addr; - sdp_mask->dst_addr.ip4.addr = htonl(~0); - } else { - cma_set_ip_ver(cma_data, 4); - cma_set_ip_ver(cma_mask, 0xF); + cma_set_ip_ver(cma_data, 4); + cma_set_ip_ver(cma_mask, 0xF); + if (!cma_any_addr(addr)) { cma_data->dst_addr.ip4.addr = ip4_addr; cma_mask->dst_addr.ip4.addr = htonl(~0); } break; case AF_INET6: ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr; - if (ps == RDMA_PS_SDP) { - sdp_set_ip_ver(sdp_data, 6); - sdp_set_ip_ver(sdp_mask, 0xF); - sdp_data->dst_addr.ip6 = ip6_addr; - memset(&sdp_mask->dst_addr.ip6, 0xFF, - sizeof sdp_mask->dst_addr.ip6); - } else { - cma_set_ip_ver(cma_data, 6); - cma_set_ip_ver(cma_mask, 0xF); + cma_set_ip_ver(cma_data, 6); + cma_set_ip_ver(cma_mask, 0xF); + if (!cma_any_addr(addr)) { cma_data->dst_addr.ip6 = ip6_addr; memset(&cma_mask->dst_addr.ip6, 0xFF, sizeof cma_mask->dst_addr.ip6); @@ -1323,8 +1404,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) { struct rdma_id_private *id_priv = iw_id->context; struct rdma_cm_event event; - struct sockaddr_in *sin; int ret = 0; + struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; + struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) return 0; @@ -1335,10 +1417,10 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) event.event = RDMA_CM_EVENT_DISCONNECTED; break; case IW_CM_EVENT_CONNECT_REPLY: - sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; - *sin = iw_event->local_addr; - sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr; - *sin = iw_event->remote_addr; + memcpy(cma_src_addr(id_priv), laddr, + rdma_addr_size(laddr)); + memcpy(cma_dst_addr(id_priv), raddr, + rdma_addr_size(raddr)); switch (iw_event->status) { case 0: event.event = RDMA_CM_EVENT_ESTABLISHED; @@ -1388,11 +1470,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, { struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; - struct sockaddr_in *sin; - struct net_device *dev = NULL; struct rdma_cm_event event; int ret; struct ib_device_attr attr; + struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; + struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; listen_id = cm_id->context; if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) @@ -1410,21 +1492,14 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = RDMA_CM_CONNECT; - dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); - if (!dev) { - ret = -EADDRNOTAVAIL; - mutex_unlock(&conn_id->handler_mutex); - rdma_destroy_id(new_cm_id); - goto out; - } - ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL); + ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); goto out; } - ret = cma_acquire_dev(conn_id); + ret = cma_acquire_dev(conn_id, listen_id); if (ret) { mutex_unlock(&conn_id->handler_mutex); rdma_destroy_id(new_cm_id); @@ -1435,10 +1510,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, cm_id->context = conn_id; cm_id->cm_handler = cma_iw_handler; - sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr; - *sin = iw_event->local_addr; - sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr; - *sin = iw_event->remote_addr; + memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr)); + memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr)); ret = ib_query_device(conn_id->id.device, &attr); if (ret) { @@ -1474,8 +1547,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, cma_deref_id(conn_id); out: - if (dev) - dev_put(dev); mutex_unlock(&listen_id->handler_mutex); return ret; } @@ -1494,9 +1565,9 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) id_priv->cm_id.ib = id; - addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - svc_id = cma_get_service_id(id_priv->id.ps, addr); - if (cma_any_addr(addr)) + addr = cma_src_addr(id_priv); + svc_id = rdma_get_service_id(&id_priv->id, addr); + if (cma_any_addr(addr) && !id_priv->afonly) ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); else { cma_set_compare_data(id_priv->id.ps, addr, &compare_data); @@ -1514,7 +1585,6 @@ static int cma_ib_listen(struct rdma_id_private *id_priv) static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) { int ret; - struct sockaddr_in *sin; struct iw_cm_id *id; id = iw_create_cm_id(id_priv->id.device, @@ -1525,8 +1595,8 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog) id_priv->cm_id.iw = id; - sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; - id_priv->cm_id.iw->local_addr = *sin; + memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); ret = iw_cm_listen(id_priv->cm_id.iw, backlog); @@ -1555,6 +1625,10 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct rdma_cm_id *id; int ret; + if (cma_family(id_priv) == AF_IB && + rdma_node_get_transport(cma_dev->device->node_type) != RDMA_TRANSPORT_IB) + return; + id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) @@ -1563,13 +1637,14 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, dev_id_priv = container_of(id, struct rdma_id_private, id); dev_id_priv->state = RDMA_CM_ADDR_BOUND; - memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, - ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); + memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); cma_attach_to_dev(dev_id_priv, cma_dev); list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); atomic_inc(&id_priv->refcount); dev_id_priv->internal_id = 1; + dev_id_priv->afonly = id_priv->afonly; ret = rdma_listen(id, id_priv->backlog); if (ret) @@ -1621,31 +1696,39 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec, static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms, struct cma_work *work) { - struct rdma_addr *addr = &id_priv->id.route.addr; + struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; struct ib_sa_path_rec path_rec; ib_sa_comp_mask comp_mask; struct sockaddr_in6 *sin6; + struct sockaddr_ib *sib; memset(&path_rec, 0, sizeof path_rec); - rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid); - rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid); - path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr)); + rdma_addr_get_sgid(dev_addr, &path_rec.sgid); + rdma_addr_get_dgid(dev_addr, &path_rec.dgid); + path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); path_rec.numb_path = 1; path_rec.reversible = 1; - path_rec.service_id = cma_get_service_id(id_priv->id.ps, - (struct sockaddr *) &addr->dst_addr); + path_rec.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); comp_mask = IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH | IB_SA_PATH_REC_REVERSIBLE | IB_SA_PATH_REC_SERVICE_ID; - if (addr->src_addr.ss_family == AF_INET) { + switch (cma_family(id_priv)) { + case AF_INET: path_rec.qos_class = cpu_to_be16((u16) id_priv->tos); comp_mask |= IB_SA_PATH_REC_QOS_CLASS; - } else { - sin6 = (struct sockaddr_in6 *) &addr->src_addr; + break; + case AF_INET6: + sin6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); path_rec.traffic_class = (u8) (be32_to_cpu(sin6->sin6_flowinfo) >> 20); comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; + break; + case AF_IB: + sib = (struct sockaddr_ib *) cma_src_addr(id_priv); + path_rec.traffic_class = (u8) (be32_to_cpu(sib->sib_flowinfo) >> 20); + comp_mask |= IB_SA_PATH_REC_TRAFFIC_CLASS; + break; } id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device, @@ -1781,19 +1864,34 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms) return 0; } +static int iboe_tos_to_sl(struct net_device *ndev, int tos) +{ + int prio; + struct net_device *dev; + + prio = rt_tos2priority(tos); + dev = ndev->priv_flags & IFF_802_1Q_VLAN ? + vlan_dev_real_dev(ndev) : ndev; + + if (dev->num_tc) + return netdev_get_prio_tc_map(dev, prio); + +#if IS_ENABLED(CONFIG_VLAN_8021Q) + if (ndev->priv_flags & IFF_802_1Q_VLAN) + return (vlan_dev_get_egress_qos_mask(ndev, prio) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; +#endif + return 0; +} + static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) { struct rdma_route *route = &id_priv->id.route; struct rdma_addr *addr = &route->addr; struct cma_work *work; int ret; - struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr; - struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr; struct net_device *ndev = NULL; - u16 vid; - if (src_addr->sin_family != dst_addr->sin_family) - return -EINVAL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) @@ -1817,17 +1915,20 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err2; } - vid = rdma_vlan_dev_vlan_id(ndev); + route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev); + memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN); + memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len); - iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid); - iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &route->path_rec->sgid); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr, + &route->path_rec->dgid); route->path_rec->hop_limit = 1; route->path_rec->reversible = 1; route->path_rec->pkey = cpu_to_be16(0xffff); route->path_rec->mtu_selector = IB_SA_EQ; - route->path_rec->sl = id_priv->tos >> 5; - + route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos); route->path_rec->mtu = iboe_get_mtu(ndev->mtu); route->path_rec->rate_selector = IB_SA_EQ; route->path_rec->rate = iboe_get_rate(ndev); @@ -1897,28 +1998,57 @@ err: } EXPORT_SYMBOL(rdma_resolve_route); +static void cma_set_loopback(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *) addr)->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + break; + case AF_INET6: + ipv6_addr_set(&((struct sockaddr_in6 *) addr)->sin6_addr, + 0, 0, 0, htonl(1)); + break; + default: + ib_addr_set(&((struct sockaddr_ib *) addr)->sib_addr, + 0, 0, 0, htonl(1)); + break; + } +} + static int cma_bind_loopback(struct rdma_id_private *id_priv) { - struct cma_device *cma_dev; + struct cma_device *cma_dev, *cur_dev; struct ib_port_attr port_attr; union ib_gid gid; u16 pkey; int ret; u8 p; + cma_dev = NULL; mutex_lock(&lock); - if (list_empty(&dev_list)) { + list_for_each_entry(cur_dev, &dev_list, list) { + if (cma_family(id_priv) == AF_IB && + rdma_node_get_transport(cur_dev->device->node_type) != RDMA_TRANSPORT_IB) + continue; + + if (!cma_dev) + cma_dev = cur_dev; + + for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) { + if (!ib_query_port(cur_dev->device, p, &port_attr) && + port_attr.state == IB_PORT_ACTIVE) { + cma_dev = cur_dev; + goto port_found; + } + } + } + + if (!cma_dev) { ret = -ENODEV; goto out; } - list_for_each_entry(cma_dev, &dev_list, list) - for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p) - if (!ib_query_port(cma_dev->device, p, &port_attr) && - port_attr.state == IB_PORT_ACTIVE) - goto port_found; p = 1; - cma_dev = list_entry(dev_list.next, struct cma_device, list); port_found: ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid); @@ -1937,6 +2067,7 @@ port_found: ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey); id_priv->id.port_num = p; cma_attach_to_dev(id_priv, cma_dev); + cma_set_loopback(cma_src_addr(id_priv)); out: mutex_unlock(&lock); return ret; @@ -1954,8 +2085,9 @@ static void addr_handler(int status, struct sockaddr *src_addr, RDMA_CM_ADDR_RESOLVED)) goto out; + memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr)); if (!status && !id_priv->cma_dev) - status = cma_acquire_dev(id_priv); + status = cma_acquire_dev(id_priv, NULL); if (status) { if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED, @@ -1963,11 +2095,8 @@ static void addr_handler(int status, struct sockaddr *src_addr, goto out; event.event = RDMA_CM_EVENT_ADDR_ERROR; event.status = status; - } else { - memcpy(&id_priv->id.route.addr.src_addr, src_addr, - ip_addr_size(src_addr)); + } else event.event = RDMA_CM_EVENT_ADDR_RESOLVED; - } if (id_priv->id.event_handler(&id_priv->id, &event)) { cma_exch(id_priv, RDMA_CM_DESTROYING); @@ -1984,7 +2113,6 @@ out: static int cma_resolve_loopback(struct rdma_id_private *id_priv) { struct cma_work *work; - struct sockaddr *src, *dst; union ib_gid gid; int ret; @@ -2001,18 +2129,36 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid); rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid); - src = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - if (cma_zero_addr(src)) { - dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; - if ((src->sa_family = dst->sa_family) == AF_INET) { - ((struct sockaddr_in *)src)->sin_addr = - ((struct sockaddr_in *)dst)->sin_addr; - } else { - ((struct sockaddr_in6 *)src)->sin6_addr = - ((struct sockaddr_in6 *)dst)->sin6_addr; - } + work->id = id_priv; + INIT_WORK(&work->work, cma_work_handler); + work->old_state = RDMA_CM_ADDR_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED; + queue_work(cma_wq, &work->work); + return 0; +err: + kfree(work); + return ret; +} + +static int cma_resolve_ib_addr(struct rdma_id_private *id_priv) +{ + struct cma_work *work; + int ret; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (!id_priv->cma_dev) { + ret = cma_resolve_ib_dev(id_priv); + if (ret) + goto err; } + rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, (union ib_gid *) + &(((struct sockaddr_ib *) &id_priv->id.route.addr.dst_addr)->sib_addr)); + work->id = id_priv; INIT_WORK(&work->work, cma_work_handler); work->old_state = RDMA_CM_ADDR_QUERY; @@ -2030,9 +2176,13 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, { if (!src_addr || !src_addr->sa_family) { src_addr = (struct sockaddr *) &id->route.addr.src_addr; - if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) { + src_addr->sa_family = dst_addr->sa_family; + if (dst_addr->sa_family == AF_INET6) { ((struct sockaddr_in6 *) src_addr)->sin6_scope_id = ((struct sockaddr_in6 *) dst_addr)->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *) src_addr)->sib_pkey = + ((struct sockaddr_ib *) dst_addr)->sib_pkey; } } return rdma_bind_addr(id, src_addr); @@ -2051,17 +2201,25 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return ret; } + if (cma_family(id_priv) != dst_addr->sa_family) + return -EINVAL; + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) return -EINVAL; atomic_inc(&id_priv->refcount); - memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr)); - if (cma_any_addr(dst_addr)) + memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); + if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); - else - ret = rdma_resolve_ip(&addr_client, (struct sockaddr *) &id->route.addr.src_addr, - dst_addr, &id->route.addr.dev_addr, - timeout_ms, addr_handler, id_priv); + } else { + if (dst_addr->sa_family == AF_IB) { + ret = cma_resolve_ib_addr(id_priv); + } else { + ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv), + dst_addr, &id->route.addr.dev_addr, + timeout_ms, addr_handler, id_priv); + } + } if (ret) goto err; @@ -2081,7 +2239,7 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); - if (id_priv->state == RDMA_CM_IDLE) { + if (reuse || id_priv->state == RDMA_CM_IDLE) { id_priv->reuseaddr = reuse; ret = 0; } else { @@ -2092,13 +2250,52 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) } EXPORT_SYMBOL(rdma_set_reuseaddr); +int rdma_set_afonly(struct rdma_cm_id *id, int afonly) +{ + struct rdma_id_private *id_priv; + unsigned long flags; + int ret; + + id_priv = container_of(id, struct rdma_id_private, id); + spin_lock_irqsave(&id_priv->lock, flags); + if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) { + id_priv->options |= (1 << CMA_OPTION_AFONLY); + id_priv->afonly = afonly; + ret = 0; + } else { + ret = -EINVAL; + } + spin_unlock_irqrestore(&id_priv->lock, flags); + return ret; +} +EXPORT_SYMBOL(rdma_set_afonly); + static void cma_bind_port(struct rdma_bind_list *bind_list, struct rdma_id_private *id_priv) { - struct sockaddr_in *sin; + struct sockaddr *addr; + struct sockaddr_ib *sib; + u64 sid, mask; + __be16 port; + + addr = cma_src_addr(id_priv); + port = htons(bind_list->port); - sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr; - sin->sin_port = htons(bind_list->port); + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *) addr)->sin_port = port; + break; + case AF_INET6: + ((struct sockaddr_in6 *) addr)->sin6_port = port; + break; + case AF_IB: + sib = (struct sockaddr_ib *) addr; + sid = be64_to_cpu(sib->sib_sid); + mask = be64_to_cpu(sib->sib_sid_mask); + sib->sib_sid = cpu_to_be64((sid & mask) | (u64) ntohs(port)); + sib->sib_sid_mask = cpu_to_be64(~0ULL); + break; + } id_priv->bind_list = bind_list; hlist_add_head(&id_priv->node, &bind_list->owners); } @@ -2107,33 +2304,23 @@ static int cma_alloc_port(struct idr *ps, struct rdma_id_private *id_priv, unsigned short snum) { struct rdma_bind_list *bind_list; - int port, ret; + int ret; bind_list = kzalloc(sizeof *bind_list, GFP_KERNEL); if (!bind_list) return -ENOMEM; - do { - ret = idr_get_new_above(ps, bind_list, snum, &port); - } while ((ret == -EAGAIN) && idr_pre_get(ps, GFP_KERNEL)); - - if (ret) - goto err1; - - if (port != snum) { - ret = -EADDRNOTAVAIL; - goto err2; - } + ret = idr_alloc(ps, bind_list, snum, snum + 1, GFP_KERNEL); + if (ret < 0) + goto err; bind_list->ps = ps; - bind_list->port = (unsigned short) port; + bind_list->port = (unsigned short)ret; cma_bind_port(bind_list, id_priv); return 0; -err2: - idr_remove(ps, port); -err1: +err: kfree(bind_list); - return ret; + return ret == -ENOSPC ? -EADDRNOTAVAIL : ret; } static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) @@ -2142,9 +2329,9 @@ static int cma_alloc_any_port(struct idr *ps, struct rdma_id_private *id_priv) int low, high, remaining; unsigned int rover; - inet_get_local_port_range(&low, &high); + inet_get_local_port_range(&init_net, &low, &high); remaining = (high - low) + 1; - rover = net_random() % remaining + low; + rover = prandom_u32() % remaining + low; retry: if (last_used_port != rover && !idr_find(ps, (unsigned short) rover)) { @@ -2178,25 +2365,26 @@ static int cma_check_port(struct rdma_bind_list *bind_list, { struct rdma_id_private *cur_id; struct sockaddr *addr, *cur_addr; - struct hlist_node *node; - - addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; - if (cma_any_addr(addr) && !reuseaddr) - return -EADDRNOTAVAIL; - hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { + addr = cma_src_addr(id_priv); + hlist_for_each_entry(cur_id, &bind_list->owners, node) { if (id_priv == cur_id) continue; - if ((cur_id->state == RDMA_CM_LISTEN) || - !reuseaddr || !cur_id->reuseaddr) { - cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; - if (cma_any_addr(cur_addr)) - return -EADDRNOTAVAIL; + if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && + cur_id->reuseaddr) + continue; + + cur_addr = cma_src_addr(cur_id); + if (id_priv->afonly && cur_id->afonly && + (addr->sa_family != cur_addr->sa_family)) + continue; + + if (cma_any_addr(addr) || cma_any_addr(cur_addr)) + return -EADDRNOTAVAIL; - if (!cma_addr_cmp(addr, cur_addr)) - return -EADDRINUSE; - } + if (!cma_addr_cmp(addr, cur_addr)) + return -EADDRINUSE; } return 0; } @@ -2207,7 +2395,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv) unsigned short snum; int ret; - snum = ntohs(cma_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)); + snum = ntohs(cma_port(cma_src_addr(id_priv))); if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; @@ -2234,33 +2422,67 @@ static int cma_bind_listen(struct rdma_id_private *id_priv) return ret; } -static int cma_get_port(struct rdma_id_private *id_priv) +static struct idr *cma_select_inet_ps(struct rdma_id_private *id_priv) { - struct idr *ps; - int ret; - switch (id_priv->id.ps) { - case RDMA_PS_SDP: - ps = &sdp_ps; - break; case RDMA_PS_TCP: - ps = &tcp_ps; - break; + return &tcp_ps; case RDMA_PS_UDP: - ps = &udp_ps; - break; + return &udp_ps; case RDMA_PS_IPOIB: - ps = &ipoib_ps; - break; + return &ipoib_ps; case RDMA_PS_IB: - ps = &ib_ps; - break; + return &ib_ps; default: - return -EPROTONOSUPPORT; + return NULL; + } +} + +static struct idr *cma_select_ib_ps(struct rdma_id_private *id_priv) +{ + struct idr *ps = NULL; + struct sockaddr_ib *sib; + u64 sid_ps, mask, sid; + + sib = (struct sockaddr_ib *) cma_src_addr(id_priv); + mask = be64_to_cpu(sib->sib_sid_mask) & RDMA_IB_IP_PS_MASK; + sid = be64_to_cpu(sib->sib_sid) & mask; + + if ((id_priv->id.ps == RDMA_PS_IB) && (sid == (RDMA_IB_IP_PS_IB & mask))) { + sid_ps = RDMA_IB_IP_PS_IB; + ps = &ib_ps; + } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_TCP)) && + (sid == (RDMA_IB_IP_PS_TCP & mask))) { + sid_ps = RDMA_IB_IP_PS_TCP; + ps = &tcp_ps; + } else if (((id_priv->id.ps == RDMA_PS_IB) || (id_priv->id.ps == RDMA_PS_UDP)) && + (sid == (RDMA_IB_IP_PS_UDP & mask))) { + sid_ps = RDMA_IB_IP_PS_UDP; + ps = &udp_ps; + } + + if (ps) { + sib->sib_sid = cpu_to_be64(sid_ps | ntohs(cma_port((struct sockaddr *) sib))); + sib->sib_sid_mask = cpu_to_be64(RDMA_IB_IP_PS_MASK | + be64_to_cpu(sib->sib_sid_mask)); } + return ps; +} + +static int cma_get_port(struct rdma_id_private *id_priv) +{ + struct idr *ps; + int ret; + + if (cma_family(id_priv) != AF_IB) + ps = cma_select_inet_ps(id_priv); + else + ps = cma_select_ib_ps(id_priv); + if (!ps) + return -EPROTONOSUPPORT; mutex_lock(&lock); - if (cma_any_port((struct sockaddr *) &id_priv->id.route.addr.src_addr)) + if (cma_any_port(cma_src_addr(id_priv))) ret = cma_alloc_any_port(ps, id_priv); else ret = cma_use_port(ps, id_priv); @@ -2272,15 +2494,18 @@ static int cma_get_port(struct rdma_id_private *id_priv) static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, struct sockaddr *addr) { -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6) struct sockaddr_in6 *sin6; if (addr->sa_family != AF_INET6) return 0; sin6 = (struct sockaddr_in6 *) addr; - if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && - !sin6->sin6_scope_id) + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) + return 0; + + if (!sin6->sin6_scope_id) return -EINVAL; dev_addr->bound_dev_if = sin6->sin6_scope_id; @@ -2295,8 +2520,8 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) id_priv = container_of(id, struct rdma_id_private, id); if (id_priv->state == RDMA_CM_IDLE) { - ((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET; - ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr); + id->route.addr.src_addr.ss_family = AF_INET; + ret = rdma_bind_addr(id, cma_src_addr(id_priv)); if (ret) return ret; } @@ -2343,7 +2568,8 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) struct rdma_id_private *id_priv; int ret; - if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) + if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 && + addr->sa_family != AF_IB) return -EAFNOSUPPORT; id_priv = container_of(id, struct rdma_id_private, id); @@ -2354,17 +2580,25 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (ret) goto err1; + memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr)); if (!cma_any_addr(addr)) { - ret = rdma_translate_ip(addr, &id->route.addr.dev_addr); + ret = cma_translate_addr(addr, &id->route.addr.dev_addr); if (ret) goto err1; - ret = cma_acquire_dev(id_priv); + ret = cma_acquire_dev(id_priv, NULL); if (ret) goto err1; } - memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); + if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) { + if (addr->sa_family == AF_INET) + id_priv->afonly = 1; +#if IS_ENABLED(CONFIG_IPV6) + else if (addr->sa_family == AF_INET6) + id_priv->afonly = init_net.ipv6.sysctl.bindv6only; +#endif + } ret = cma_get_port(id_priv); if (ret) goto err2; @@ -2379,62 +2613,32 @@ err1: } EXPORT_SYMBOL(rdma_bind_addr); -static int cma_format_hdr(void *hdr, enum rdma_port_space ps, - struct rdma_route *route) +static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv) { struct cma_hdr *cma_hdr; - struct sdp_hh *sdp_hdr; - if (route->addr.src_addr.ss_family == AF_INET) { + cma_hdr = hdr; + cma_hdr->cma_version = CMA_VERSION; + if (cma_family(id_priv) == AF_INET) { struct sockaddr_in *src4, *dst4; - src4 = (struct sockaddr_in *) &route->addr.src_addr; - dst4 = (struct sockaddr_in *) &route->addr.dst_addr; - - switch (ps) { - case RDMA_PS_SDP: - sdp_hdr = hdr; - if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) - return -EINVAL; - sdp_set_ip_ver(sdp_hdr, 4); - sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - sdp_hdr->port = src4->sin_port; - break; - default: - cma_hdr = hdr; - cma_hdr->cma_version = CMA_VERSION; - cma_set_ip_ver(cma_hdr, 4); - cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; - cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; - cma_hdr->port = src4->sin_port; - break; - } - } else { + src4 = (struct sockaddr_in *) cma_src_addr(id_priv); + dst4 = (struct sockaddr_in *) cma_dst_addr(id_priv); + + cma_set_ip_ver(cma_hdr, 4); + cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr; + cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr; + cma_hdr->port = src4->sin_port; + } else if (cma_family(id_priv) == AF_INET6) { struct sockaddr_in6 *src6, *dst6; - src6 = (struct sockaddr_in6 *) &route->addr.src_addr; - dst6 = (struct sockaddr_in6 *) &route->addr.dst_addr; - - switch (ps) { - case RDMA_PS_SDP: - sdp_hdr = hdr; - if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION) - return -EINVAL; - sdp_set_ip_ver(sdp_hdr, 6); - sdp_hdr->src_addr.ip6 = src6->sin6_addr; - sdp_hdr->dst_addr.ip6 = dst6->sin6_addr; - sdp_hdr->port = src6->sin6_port; - break; - default: - cma_hdr = hdr; - cma_hdr->cma_version = CMA_VERSION; - cma_set_ip_ver(cma_hdr, 6); - cma_hdr->src_addr.ip6 = src6->sin6_addr; - cma_hdr->dst_addr.ip6 = dst6->sin6_addr; - cma_hdr->port = src6->sin6_port; - break; - } + src6 = (struct sockaddr_in6 *) cma_src_addr(id_priv); + dst6 = (struct sockaddr_in6 *) cma_dst_addr(id_priv); + + cma_set_ip_ver(cma_hdr, 6); + cma_hdr->src_addr.ip6 = src6->sin6_addr; + cma_hdr->dst_addr.ip6 = dst6->sin6_addr; + cma_hdr->port = src6->sin6_port; } return 0; } @@ -2464,15 +2668,10 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, event.status = ib_event->param.sidr_rep_rcvd.status; break; } - ret = cma_set_qkey(id_priv); + ret = cma_set_qkey(id_priv, rep->qkey); if (ret) { event.event = RDMA_CM_EVENT_ADDR_ERROR; - event.status = -EINVAL; - break; - } - if (id_priv->qkey != rep->qkey) { - event.event = RDMA_CM_EVENT_UNREACHABLE; - event.status = -EINVAL; + event.status = ret; break; } ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num, @@ -2507,27 +2706,34 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct ib_cm_sidr_req_param req; - struct rdma_route *route; struct ib_cm_id *id; - int ret; + void *private_data; + int offset, ret; - req.private_data_len = sizeof(struct cma_hdr) + - conn_param->private_data_len; + memset(&req, 0, sizeof req); + offset = cma_user_data_offset(id_priv); + req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; - req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); - if (!req.private_data) - return -ENOMEM; + if (req.private_data_len) { + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!private_data) + return -ENOMEM; + } else { + private_data = NULL; + } if (conn_param->private_data && conn_param->private_data_len) - memcpy((void *) req.private_data + sizeof(struct cma_hdr), - conn_param->private_data, conn_param->private_data_len); + memcpy(private_data + offset, conn_param->private_data, + conn_param->private_data_len); - route = &id_priv->id.route; - ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route); - if (ret) - goto out; + if (private_data) { + ret = cma_format_hdr(private_data, id_priv); + if (ret) + goto out; + req.private_data = private_data; + } id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler, id_priv); @@ -2537,9 +2743,8 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, } id_priv->cm_id.ib = id; - req.path = route->path_rec; - req.service_id = cma_get_service_id(id_priv->id.ps, - (struct sockaddr *) &route->addr.dst_addr); + req.path = id_priv->id.route.path_rec; + req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8); req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -2549,7 +2754,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, id_priv->cm_id.ib = NULL; } out: - kfree(req.private_data); + kfree(private_data); return ret; } @@ -2563,14 +2768,18 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, int offset, ret; memset(&req, 0, sizeof req); - offset = cma_user_data_offset(id_priv->id.ps); + offset = cma_user_data_offset(id_priv); req.private_data_len = offset + conn_param->private_data_len; if (req.private_data_len < conn_param->private_data_len) return -EINVAL; - private_data = kzalloc(req.private_data_len, GFP_ATOMIC); - if (!private_data) - return -ENOMEM; + if (req.private_data_len) { + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); + if (!private_data) + return -ENOMEM; + } else { + private_data = NULL; + } if (conn_param->private_data && conn_param->private_data_len) memcpy(private_data + offset, conn_param->private_data, @@ -2584,25 +2793,26 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, id_priv->cm_id.ib = id; route = &id_priv->id.route; - ret = cma_format_hdr(private_data, id_priv->id.ps, route); - if (ret) - goto out; - req.private_data = private_data; + if (private_data) { + ret = cma_format_hdr(private_data, id_priv); + if (ret) + goto out; + req.private_data = private_data; + } req.primary_path = &route->path_rec[0]; if (route->num_paths == 2) req.alternate_path = &route->path_rec[1]; - req.service_id = cma_get_service_id(id_priv->id.ps, - (struct sockaddr *) &route->addr.dst_addr); + req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv)); req.qp_num = id_priv->qp_num; req.qp_type = id_priv->id.qp_type; req.starting_psn = id_priv->seq_num; req.responder_resources = conn_param->responder_resources; req.initiator_depth = conn_param->initiator_depth; req.flow_control = conn_param->flow_control; - req.retry_count = conn_param->retry_count; - req.rnr_retry_count = conn_param->rnr_retry_count; + req.retry_count = min_t(u8, 7, conn_param->retry_count); + req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; req.max_cm_retries = CMA_MAX_CM_RETRIES; @@ -2623,7 +2833,6 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, struct rdma_conn_param *conn_param) { struct iw_cm_id *cm_id; - struct sockaddr_in* sin; int ret; struct iw_cm_conn_param iw_param; @@ -2633,11 +2842,10 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, id_priv->cm_id.iw = cm_id; - sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr; - cm_id->local_addr = *sin; - - sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; - cm_id->remote_addr = *sin; + memcpy(&cm_id->local_addr, cma_src_addr(id_priv), + rdma_addr_size(cma_src_addr(id_priv))); + memcpy(&cm_id->remote_addr, cma_dst_addr(id_priv), + rdma_addr_size(cma_dst_addr(id_priv))); ret = cma_modify_qp_rtr(id_priv, conn_param); if (ret) @@ -2723,7 +2931,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, rep.initiator_depth = conn_param->initiator_depth; rep.failover_accepted = 0; rep.flow_control = conn_param->flow_control; - rep.rnr_retry_count = conn_param->rnr_retry_count; + rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); rep.srq = id_priv->srq ? 1 : 0; ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); @@ -2754,7 +2962,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, } static int cma_send_sidr_rep(struct rdma_id_private *id_priv, - enum ib_cm_sidr_status status, + enum ib_cm_sidr_status status, u32 qkey, const void *private_data, int private_data_len) { struct ib_cm_sidr_rep_param rep; @@ -2763,7 +2971,7 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, memset(&rep, 0, sizeof rep); rep.status = status; if (status == IB_SIDR_SUCCESS) { - ret = cma_set_qkey(id_priv); + ret = cma_set_qkey(id_priv, qkey); if (ret) return ret; rep.qp_num = id_priv->qp_num; @@ -2797,11 +3005,12 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) if (id->qp_type == IB_QPT_UD) { if (conn_param) ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, + conn_param->qkey, conn_param->private_data, conn_param->private_data_len); else ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, - NULL, 0); + 0, NULL, 0); } else { if (conn_param) ret = cma_accept_ib(id_priv, conn_param); @@ -2862,7 +3071,7 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data, switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: if (id->qp_type == IB_QPT_UD) - ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, + ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT, 0, private_data, private_data_len); else ret = ib_send_cm_rej(id_priv->cm_id.ib, @@ -2923,6 +3132,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) return 0; + if (!status) + status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); mutex_lock(&id_priv->qp_mutex); if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, @@ -2969,6 +3180,8 @@ static void cma_set_mgid(struct rdma_id_private *id_priv, 0xFF10A01B)) { /* IPv6 address is an SA assigned MGID. */ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid); + } else if (addr->sa_family == AF_IB) { + memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid); } else if ((addr->sa_family == AF_INET6)) { ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map); if (id_priv->id.ps == RDMA_PS_UDP) @@ -2996,9 +3209,12 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (ret) return ret; + ret = cma_set_qkey(id_priv, 0); + if (ret) + return ret; + cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid); - if (id_priv->id.ps == RDMA_PS_UDP) - rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + rec.qkey = cpu_to_be32(id_priv->qkey); rdma_addr_get_sgid(dev_addr, &rec.port_gid); rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr)); rec.join_state = 1; @@ -3011,16 +3227,16 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, if (id_priv->id.ps == RDMA_PS_IPOIB) comp_mask |= IB_SA_MCMEMBER_REC_RATE | - IB_SA_MCMEMBER_REC_RATE_SELECTOR; + IB_SA_MCMEMBER_REC_RATE_SELECTOR | + IB_SA_MCMEMBER_REC_MTU_SELECTOR | + IB_SA_MCMEMBER_REC_MTU | + IB_SA_MCMEMBER_REC_HOP_LIMIT; mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, id_priv->id.port_num, &rec, comp_mask, GFP_KERNEL, cma_ib_mc_handler, mc); - if (IS_ERR(mc->multicast.ib)) - return PTR_ERR(mc->multicast.ib); - - return 0; + return PTR_ERR_OR_ZERO(mc->multicast.ib); } static void iboe_mcast_work_handler(struct work_struct *work) @@ -3103,7 +3319,8 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, err = -EINVAL; goto out2; } - iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid); + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, + &mc->multicast.ib->rec.port_gid); work->id = id_priv; work->mc = mc; INIT_WORK(&work->work, iboe_mcast_work_handler); @@ -3135,7 +3352,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, if (!mc) return -ENOMEM; - memcpy(&mc->addr, addr, ip_addr_size(addr)); + memcpy(&mc->addr, addr, rdma_addr_size(addr)); mc->context = context; mc->id_priv = id_priv; @@ -3180,7 +3397,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { - if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) { + if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { list_del(&mc->list); spin_unlock_irq(&id_priv->lock); @@ -3234,9 +3451,9 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id } static int cma_netdev_callback(struct notifier_block *self, unsigned long event, - void *ctx) + void *ptr) { - struct net_device *ndev = (struct net_device *)ctx; + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct cma_device *cma_dev; struct rdma_id_private *id_priv; int ret = NOTIFY_DONE; @@ -3390,7 +3607,8 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq, sizeof *id_stats, RDMA_NL_RDMA_CM, - RDMA_NL_RDMA_CM_ID_STATS); + RDMA_NL_RDMA_CM_ID_STATS, + NLM_F_MULTI); if (!id_stats) goto out; @@ -3401,33 +3619,16 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb) id_stats->bound_dev_if = id->route.addr.dev_addr.bound_dev_if; - if (id->route.addr.src_addr.ss_family == AF_INET) { - if (ibnl_put_attr(skb, nlh, - sizeof(struct sockaddr_in), - &id->route.addr.src_addr, - RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) { - goto out; - } - if (ibnl_put_attr(skb, nlh, - sizeof(struct sockaddr_in), - &id->route.addr.dst_addr, - RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) { - goto out; - } - } else if (id->route.addr.src_addr.ss_family == AF_INET6) { - if (ibnl_put_attr(skb, nlh, - sizeof(struct sockaddr_in6), - &id->route.addr.src_addr, - RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) { - goto out; - } - if (ibnl_put_attr(skb, nlh, - sizeof(struct sockaddr_in6), - &id->route.addr.dst_addr, - RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) { - goto out; - } - } + if (ibnl_put_attr(skb, nlh, + rdma_addr_size(cma_src_addr(id_priv)), + cma_src_addr(id_priv), + RDMA_NL_RDMA_CM_ATTR_SRC_ADDR)) + goto out; + if (ibnl_put_attr(skb, nlh, + rdma_addr_size(cma_src_addr(id_priv)), + cma_dst_addr(id_priv), + RDMA_NL_RDMA_CM_ATTR_DST_ADDR)) + goto out; id_stats->pid = id_priv->owner; id_stats->port_space = id->ps; @@ -3451,7 +3652,8 @@ out: } static const struct ibnl_client_cbs cma_cb_table[] = { - [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats }, + [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats, + .module = THIS_MODULE }, }; static int __init cma_init(void) @@ -3491,7 +3693,6 @@ static void __exit cma_cleanup(void) rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); destroy_workqueue(cma_wq); - idr_destroy(&sdp_ps); idr_destroy(&tcp_ps); idr_destroy(&udp_ps); idr_destroy(&ipoib_ps); diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a565af5c2d2..87d1936f5c1 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -49,4 +49,6 @@ void ib_sysfs_cleanup(void); int ib_cache_setup(void); void ib_cache_cleanup(void); +int ib_resolve_eth_l2_attrs(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, int *qp_attr_mask); #endif /* _CORE_PRIV_H */ diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e711de400a0..18c1ece765f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -707,18 +707,28 @@ int ib_find_pkey(struct ib_device *device, { int ret, i; u16 tmp_pkey; + int partial_ix = -1; for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { ret = ib_query_pkey(device, port_num, i, &tmp_pkey); if (ret) return ret; - if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { - *index = i; - return 0; + /* if there is full-member pkey take it.*/ + if (tmp_pkey & 0x8000) { + *index = i; + return 0; + } + if (partial_ix < 0) + partial_ix = i; } } + /*no full-member, if exists take the limited*/ + if (partial_ix >= 0) { + *index = partial_ix; + return 0; + } return -ENOENT; } EXPORT_SYMBOL(ib_find_pkey); diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c index 176c8f90f2b..9f5ad7cc33c 100644 --- a/drivers/infiniband/core/fmr_pool.c +++ b/drivers/infiniband/core/fmr_pool.c @@ -118,14 +118,13 @@ static inline struct ib_pool_fmr *ib_fmr_cache_lookup(struct ib_fmr_pool *pool, { struct hlist_head *bucket; struct ib_pool_fmr *fmr; - struct hlist_node *pos; if (!pool->cache_bucket) return NULL; bucket = pool->cache_bucket + ib_fmr_hash(*page_list); - hlist_for_each_entry(fmr, pos, bucket, cache_node) + hlist_for_each_entry(fmr, bucket, cache_node) if (io_virtual_address == fmr->io_virtual_address && page_list_len == fmr->page_list_len && !memcmp(page_list, fmr->page_list, diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 1a696f76b61..3d2e489ab73 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -181,9 +181,16 @@ static void add_ref(struct iw_cm_id *cm_id) static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; + int cb_destroy; + cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); - if (iwcm_deref_id(cm_id_priv) && - test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) { + + /* + * Test bit before deref in case the cm_id gets freed on another + * thread. + */ + cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); + if (iwcm_deref_id(cm_id_priv) && cb_destroy) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); } @@ -327,7 +334,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; unsigned long flags; - int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* @@ -343,7 +349,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* destroy the listening endpoint */ - ret = cm_id->device->iwcm->destroy_listen(cm_id); + cm_id->device->iwcm->destroy_listen(cm_id); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: @@ -624,17 +630,6 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, */ BUG_ON(iw_event->status); - /* - * We could be destroying the listening id. If so, ignore this - * upcall. - */ - spin_lock_irqsave(&listen_id_priv->lock, flags); - if (listen_id_priv->state != IW_CM_STATE_LISTEN) { - spin_unlock_irqrestore(&listen_id_priv->lock, flags); - goto out; - } - spin_unlock_irqrestore(&listen_id_priv->lock, flags); - cm_id = iw_create_cm_id(listen_id_priv->id.device, listen_id_priv->id.cm_handler, listen_id_priv->id.context); @@ -649,6 +644,19 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; + /* + * We could be destroying the listening id. If so, ignore this + * upcall. + */ + spin_lock_irqsave(&listen_id_priv->lock, flags); + if (listen_id_priv->state != IW_CM_STATE_LISTEN) { + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + goto out; + } + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + ret = alloc_work_entries(cm_id_priv, 3); if (ret) { iw_cm_reject(cm_id, NULL, 0); @@ -876,6 +884,8 @@ static void cm_work_handler(struct work_struct *_work) } return; } + if (empty) + return; spin_lock_irqsave(&cm_id_priv->lock, flags); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c new file mode 100644 index 00000000000..b85ddbc979e --- /dev/null +++ b/drivers/infiniband/core/iwpm_msg.c @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "iwpm_util.h" + +static const char iwpm_ulib_name[] = "iWarpPortMapperUser"; +static int iwpm_ulib_version = 3; +static int iwpm_user_pid = IWPM_PID_UNDEFINED; +static atomic_t echo_nlmsg_seq; + +int iwpm_valid_pid(void) +{ + return iwpm_user_pid > 0; +} +EXPORT_SYMBOL(iwpm_valid_pid); + +/* + * iwpm_register_pid - Send a netlink query to user space + * for the iwarp port mapper pid + * + * nlmsg attributes: + * [IWPM_NLA_REG_PID_SEQ] + * [IWPM_NLA_REG_IF_NAME] + * [IWPM_NLA_REG_IBDEV_NAME] + * [IWPM_NLA_REG_ULIB_NAME] + */ +int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client) +{ + struct sk_buff *skb = NULL; + struct iwpm_nlmsg_request *nlmsg_request = NULL; + struct nlmsghdr *nlh; + u32 msg_seq; + const char *err_str = ""; + int ret = -EINVAL; + + if (!iwpm_valid_client(nl_client)) { + err_str = "Invalid port mapper client"; + goto pid_query_error; + } + if (iwpm_registered_client(nl_client)) + return 0; + skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REG_PID, &nlh, nl_client); + if (!skb) { + err_str = "Unable to create a nlmsg"; + goto pid_query_error; + } + nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); + nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL); + if (!nlmsg_request) { + err_str = "Unable to allocate netlink request"; + goto pid_query_error; + } + msg_seq = atomic_read(&echo_nlmsg_seq); + + /* fill in the pid request message */ + err_str = "Unable to put attribute of the nlmsg"; + ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ); + if (ret) + goto pid_query_error; + ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE, + pm_msg->if_name, IWPM_NLA_REG_IF_NAME); + if (ret) + goto pid_query_error; + ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE, + pm_msg->dev_name, IWPM_NLA_REG_IBDEV_NAME); + if (ret) + goto pid_query_error; + ret = ibnl_put_attr(skb, nlh, IWPM_ULIBNAME_SIZE, + (char *)iwpm_ulib_name, IWPM_NLA_REG_ULIB_NAME); + if (ret) + goto pid_query_error; + + pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n", + __func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name); + + ret = ibnl_multicast(skb, nlh, RDMA_NL_GROUP_IWPM, GFP_KERNEL); + if (ret) { + skb = NULL; /* skb is freed in the netlink send-op handling */ + iwpm_set_registered(nl_client, 1); + iwpm_user_pid = IWPM_PID_UNAVAILABLE; + err_str = "Unable to send a nlmsg"; + goto pid_query_error; + } + nlmsg_request->req_buffer = pm_msg; + ret = iwpm_wait_complete_req(nlmsg_request); + return ret; +pid_query_error: + pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); + if (skb) + dev_kfree_skb(skb); + if (nlmsg_request) + iwpm_free_nlmsg_request(&nlmsg_request->kref); + return ret; +} +EXPORT_SYMBOL(iwpm_register_pid); + +/* + * iwpm_add_mapping - Send a netlink add mapping message + * to the port mapper + * nlmsg attributes: + * [IWPM_NLA_MANAGE_MAPPING_SEQ] + * [IWPM_NLA_MANAGE_ADDR] + */ +int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) +{ + struct sk_buff *skb = NULL; + struct iwpm_nlmsg_request *nlmsg_request = NULL; + struct nlmsghdr *nlh; + u32 msg_seq; + const char *err_str = ""; + int ret = -EINVAL; + + if (!iwpm_valid_client(nl_client)) { + err_str = "Invalid port mapper client"; + goto add_mapping_error; + } + if (!iwpm_registered_client(nl_client)) { + err_str = "Unregistered port mapper client"; + goto add_mapping_error; + } + if (!iwpm_valid_pid()) + return 0; + skb = iwpm_create_nlmsg(RDMA_NL_IWPM_ADD_MAPPING, &nlh, nl_client); + if (!skb) { + err_str = "Unable to create a nlmsg"; + goto add_mapping_error; + } + nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); + nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, nl_client, GFP_KERNEL); + if (!nlmsg_request) { + err_str = "Unable to allocate netlink request"; + goto add_mapping_error; + } + msg_seq = atomic_read(&echo_nlmsg_seq); + /* fill in the add mapping message */ + err_str = "Unable to put attribute of the nlmsg"; + ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, + IWPM_NLA_MANAGE_MAPPING_SEQ); + if (ret) + goto add_mapping_error; + ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), + &pm_msg->loc_addr, IWPM_NLA_MANAGE_ADDR); + if (ret) + goto add_mapping_error; + nlmsg_request->req_buffer = pm_msg; + + ret = ibnl_unicast(skb, nlh, iwpm_user_pid); + if (ret) { + skb = NULL; /* skb is freed in the netlink send-op handling */ + iwpm_user_pid = IWPM_PID_UNDEFINED; + err_str = "Unable to send a nlmsg"; + goto add_mapping_error; + } + ret = iwpm_wait_complete_req(nlmsg_request); + return ret; +add_mapping_error: + pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); + if (skb) + dev_kfree_skb(skb); + if (nlmsg_request) + iwpm_free_nlmsg_request(&nlmsg_request->kref); + return ret; +} +EXPORT_SYMBOL(iwpm_add_mapping); + +/* + * iwpm_add_and_query_mapping - Send a netlink add and query + * mapping message to the port mapper + * nlmsg attributes: + * [IWPM_NLA_QUERY_MAPPING_SEQ] + * [IWPM_NLA_QUERY_LOCAL_ADDR] + * [IWPM_NLA_QUERY_REMOTE_ADDR] + */ +int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client) +{ + struct sk_buff *skb = NULL; + struct iwpm_nlmsg_request *nlmsg_request = NULL; + struct nlmsghdr *nlh; + u32 msg_seq; + const char *err_str = ""; + int ret = -EINVAL; + + if (!iwpm_valid_client(nl_client)) { + err_str = "Invalid port mapper client"; + goto query_mapping_error; + } + if (!iwpm_registered_client(nl_client)) { + err_str = "Unregistered port mapper client"; + goto query_mapping_error; + } + if (!iwpm_valid_pid()) + return 0; + ret = -ENOMEM; + skb = iwpm_create_nlmsg(RDMA_NL_IWPM_QUERY_MAPPING, &nlh, nl_client); + if (!skb) { + err_str = "Unable to create a nlmsg"; + goto query_mapping_error; + } + nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); + nlmsg_request = iwpm_get_nlmsg_request(nlh->nlmsg_seq, + nl_client, GFP_KERNEL); + if (!nlmsg_request) { + err_str = "Unable to allocate netlink request"; + goto query_mapping_error; + } + msg_seq = atomic_read(&echo_nlmsg_seq); + + /* fill in the query message */ + err_str = "Unable to put attribute of the nlmsg"; + ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, + IWPM_NLA_QUERY_MAPPING_SEQ); + if (ret) + goto query_mapping_error; + ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), + &pm_msg->loc_addr, IWPM_NLA_QUERY_LOCAL_ADDR); + if (ret) + goto query_mapping_error; + ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), + &pm_msg->rem_addr, IWPM_NLA_QUERY_REMOTE_ADDR); + if (ret) + goto query_mapping_error; + nlmsg_request->req_buffer = pm_msg; + + ret = ibnl_unicast(skb, nlh, iwpm_user_pid); + if (ret) { + skb = NULL; /* skb is freed in the netlink send-op handling */ + err_str = "Unable to send a nlmsg"; + goto query_mapping_error; + } + ret = iwpm_wait_complete_req(nlmsg_request); + return ret; +query_mapping_error: + pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); + if (skb) + dev_kfree_skb(skb); + if (nlmsg_request) + iwpm_free_nlmsg_request(&nlmsg_request->kref); + return ret; +} +EXPORT_SYMBOL(iwpm_add_and_query_mapping); + +/* + * iwpm_remove_mapping - Send a netlink remove mapping message + * to the port mapper + * nlmsg attributes: + * [IWPM_NLA_MANAGE_MAPPING_SEQ] + * [IWPM_NLA_MANAGE_ADDR] + */ +int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + u32 msg_seq; + const char *err_str = ""; + int ret = -EINVAL; + + if (!iwpm_valid_client(nl_client)) { + err_str = "Invalid port mapper client"; + goto remove_mapping_error; + } + if (!iwpm_registered_client(nl_client)) { + err_str = "Unregistered port mapper client"; + goto remove_mapping_error; + } + if (!iwpm_valid_pid()) + return 0; + skb = iwpm_create_nlmsg(RDMA_NL_IWPM_REMOVE_MAPPING, &nlh, nl_client); + if (!skb) { + ret = -ENOMEM; + err_str = "Unable to create a nlmsg"; + goto remove_mapping_error; + } + msg_seq = atomic_read(&echo_nlmsg_seq); + nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); + err_str = "Unable to put attribute of the nlmsg"; + ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, + IWPM_NLA_MANAGE_MAPPING_SEQ); + if (ret) + goto remove_mapping_error; + ret = ibnl_put_attr(skb, nlh, sizeof(struct sockaddr_storage), + local_addr, IWPM_NLA_MANAGE_ADDR); + if (ret) + goto remove_mapping_error; + + ret = ibnl_unicast(skb, nlh, iwpm_user_pid); + if (ret) { + skb = NULL; /* skb is freed in the netlink send-op handling */ + iwpm_user_pid = IWPM_PID_UNDEFINED; + err_str = "Unable to send a nlmsg"; + goto remove_mapping_error; + } + iwpm_print_sockaddr(local_addr, + "remove_mapping: Local sockaddr:"); + return 0; +remove_mapping_error: + pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client); + if (skb) + dev_kfree_skb_any(skb); + return ret; +} +EXPORT_SYMBOL(iwpm_remove_mapping); + +/* netlink attribute policy for the received response to register pid request */ +static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = { + [IWPM_NLA_RREG_PID_SEQ] = { .type = NLA_U32 }, + [IWPM_NLA_RREG_IBDEV_NAME] = { .type = NLA_STRING, + .len = IWPM_DEVNAME_SIZE - 1 }, + [IWPM_NLA_RREG_ULIB_NAME] = { .type = NLA_STRING, + .len = IWPM_ULIBNAME_SIZE - 1 }, + [IWPM_NLA_RREG_ULIB_VER] = { .type = NLA_U16 }, + [IWPM_NLA_RREG_PID_ERR] = { .type = NLA_U16 } +}; + +/* + * iwpm_register_pid_cb - Process a port mapper response to + * iwpm_register_pid() + */ +int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct iwpm_nlmsg_request *nlmsg_request = NULL; + struct nlattr *nltb[IWPM_NLA_RREG_PID_MAX]; + struct iwpm_dev_data *pm_msg; + char *dev_name, *iwpm_name; + u32 msg_seq; + u8 nl_client; + u16 iwpm_version; + const char *msg_type = "Register Pid response"; + + if (iwpm_parse_nlmsg(cb, IWPM_NLA_RREG_PID_MAX, + resp_reg_policy, nltb, msg_type)) + return -EINVAL; + + msg_seq = nla_get_u32(nltb[IWPM_NLA_RREG_PID_SEQ]); + nlmsg_request = iwpm_find_nlmsg_request(msg_seq); + if (!nlmsg_request) { + pr_info("%s: Could not find a matching request (seq = %u)\n", + __func__, msg_seq); + return -EINVAL; + } + pm_msg = nlmsg_request->req_buffer; + nl_client = nlmsg_request->nl_client; + dev_name = (char *)nla_data(nltb[IWPM_NLA_RREG_IBDEV_NAME]); + iwpm_name = (char *)nla_data(nltb[IWPM_NLA_RREG_ULIB_NAME]); + iwpm_version = nla_get_u16(nltb[IWPM_NLA_RREG_ULIB_VER]); + + /* check device name, ulib name and version */ + if (strcmp(pm_msg->dev_name, dev_name) || + strcmp(iwpm_ulib_name, iwpm_name) || + iwpm_version != iwpm_ulib_version) { + + pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n", + __func__, dev_name, iwpm_name, iwpm_version); + nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; + goto register_pid_response_exit; + } + iwpm_user_pid = cb->nlh->nlmsg_pid; + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", + __func__, iwpm_user_pid); + if (iwpm_valid_client(nl_client)) + iwpm_set_registered(nl_client, 1); +register_pid_response_exit: + nlmsg_request->request_done = 1; + /* always for found nlmsg_request */ + kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); + barrier(); + wake_up(&nlmsg_request->waitq); + return 0; +} +EXPORT_SYMBOL(iwpm_register_pid_cb); + +/* netlink attribute policy for the received response to add mapping request */ +static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = { + [IWPM_NLA_MANAGE_MAPPING_SEQ] = { .type = NLA_U32 }, + [IWPM_NLA_MANAGE_ADDR] = { .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RMANAGE_MAPPING_ERR] = { .type = NLA_U16 } +}; + +/* + * iwpm_add_mapping_cb - Process a port mapper response to + * iwpm_add_mapping() + */ +int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct iwpm_sa_data *pm_msg; + struct iwpm_nlmsg_request *nlmsg_request = NULL; + struct nlattr *nltb[IWPM_NLA_RMANAGE_MAPPING_MAX]; + struct sockaddr_storage *local_sockaddr; + struct sockaddr_storage *mapped_sockaddr; + const char *msg_type; + u32 msg_seq; + + msg_type = "Add Mapping response"; + if (iwpm_parse_nlmsg(cb, IWPM_NLA_RMANAGE_MAPPING_MAX, + resp_add_policy, nltb, msg_type)) + return -EINVAL; + + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + + msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]); + nlmsg_request = iwpm_find_nlmsg_request(msg_seq); + if (!nlmsg_request) { + pr_info("%s: Could not find a matching request (seq = %u)\n", + __func__, msg_seq); + return -EINVAL; + } + pm_msg = nlmsg_request->req_buffer; + local_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_MANAGE_ADDR]); + mapped_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]); + + if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) { + nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; + goto add_mapping_response_exit; + } + if (mapped_sockaddr->ss_family != local_sockaddr->ss_family) { + pr_info("%s: Sockaddr family doesn't match the requested one\n", + __func__); + nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; + goto add_mapping_response_exit; + } + memcpy(&pm_msg->mapped_loc_addr, mapped_sockaddr, + sizeof(*mapped_sockaddr)); + iwpm_print_sockaddr(&pm_msg->loc_addr, + "add_mapping: Local sockaddr:"); + iwpm_print_sockaddr(&pm_msg->mapped_loc_addr, + "add_mapping: Mapped local sockaddr:"); + +add_mapping_response_exit: + nlmsg_request->request_done = 1; + /* always for found request */ + kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); + barrier(); + wake_up(&nlmsg_request->waitq); + return 0; +} +EXPORT_SYMBOL(iwpm_add_mapping_cb); + +/* netlink attribute policy for the response to add and query mapping request */ +static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = { + [IWPM_NLA_QUERY_MAPPING_SEQ] = { .type = NLA_U32 }, + [IWPM_NLA_QUERY_LOCAL_ADDR] = { .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_QUERY_REMOTE_ADDR] = { .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) }, + [IWPM_NLA_RQUERY_MAPPING_ERR] = { .type = NLA_U16 } +}; + +/* + * iwpm_add_and_query_mapping_cb - Process a port mapper response to + * iwpm_add_and_query_mapping() + */ +int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct iwpm_sa_data *pm_msg; + struct iwpm_nlmsg_request *nlmsg_request = NULL; + struct nlattr *nltb[IWPM_NLA_RQUERY_MAPPING_MAX]; + struct sockaddr_storage *local_sockaddr, *remote_sockaddr; + struct sockaddr_storage *mapped_loc_sockaddr, *mapped_rem_sockaddr; + const char *msg_type; + u32 msg_seq; + u16 err_code; + + msg_type = "Query Mapping response"; + if (iwpm_parse_nlmsg(cb, IWPM_NLA_RQUERY_MAPPING_MAX, + resp_query_policy, nltb, msg_type)) + return -EINVAL; + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + + msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]); + nlmsg_request = iwpm_find_nlmsg_request(msg_seq); + if (!nlmsg_request) { + pr_info("%s: Could not find a matching request (seq = %u)\n", + __func__, msg_seq); + return -EINVAL; + } + pm_msg = nlmsg_request->req_buffer; + local_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]); + remote_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]); + mapped_loc_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]); + mapped_rem_sockaddr = (struct sockaddr_storage *) + nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_REM_ADDR]); + + err_code = nla_get_u16(nltb[IWPM_NLA_RQUERY_MAPPING_ERR]); + if (err_code == IWPM_REMOTE_QUERY_REJECT) { + pr_info("%s: Received a Reject (pid = %u, echo seq = %u)\n", + __func__, cb->nlh->nlmsg_pid, msg_seq); + nlmsg_request->err_code = IWPM_REMOTE_QUERY_REJECT; + } + if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr) || + iwpm_compare_sockaddr(remote_sockaddr, &pm_msg->rem_addr)) { + pr_info("%s: Incorrect local sockaddr\n", __func__); + nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; + goto query_mapping_response_exit; + } + if (mapped_loc_sockaddr->ss_family != local_sockaddr->ss_family || + mapped_rem_sockaddr->ss_family != remote_sockaddr->ss_family) { + pr_info("%s: Sockaddr family doesn't match the requested one\n", + __func__); + nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR; + goto query_mapping_response_exit; + } + memcpy(&pm_msg->mapped_loc_addr, mapped_loc_sockaddr, + sizeof(*mapped_loc_sockaddr)); + memcpy(&pm_msg->mapped_rem_addr, mapped_rem_sockaddr, + sizeof(*mapped_rem_sockaddr)); + + iwpm_print_sockaddr(&pm_msg->loc_addr, + "query_mapping: Local sockaddr:"); + iwpm_print_sockaddr(&pm_msg->mapped_loc_addr, + "query_mapping: Mapped local sockaddr:"); + iwpm_print_sockaddr(&pm_msg->rem_addr, + "query_mapping: Remote sockaddr:"); + iwpm_print_sockaddr(&pm_msg->mapped_rem_addr, + "query_mapping: Mapped remote sockaddr:"); +query_mapping_response_exit: + nlmsg_request->request_done = 1; + /* always for found request */ + kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); + barrier(); + wake_up(&nlmsg_request->waitq); + return 0; +} +EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb); + +/* netlink attribute policy for the received request for mapping info */ +static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = { + [IWPM_NLA_MAPINFO_ULIB_NAME] = { .type = NLA_STRING, + .len = IWPM_ULIBNAME_SIZE - 1 }, + [IWPM_NLA_MAPINFO_ULIB_VER] = { .type = NLA_U16 } +}; + +/* + * iwpm_mapping_info_cb - Process a port mapper request for mapping info + */ +int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *nltb[IWPM_NLA_MAPINFO_REQ_MAX]; + const char *msg_type = "Mapping Info response"; + int iwpm_pid; + u8 nl_client; + char *iwpm_name; + u16 iwpm_version; + int ret = -EINVAL; + + if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_REQ_MAX, + resp_mapinfo_policy, nltb, msg_type)) { + pr_info("%s: Unable to parse nlmsg\n", __func__); + return ret; + } + iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]); + iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]); + if (strcmp(iwpm_ulib_name, iwpm_name) || + iwpm_version != iwpm_ulib_version) { + pr_info("%s: Invalid port mapper name = %s version = %d\n", + __func__, iwpm_name, iwpm_version); + return ret; + } + nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); + if (!iwpm_valid_client(nl_client)) { + pr_info("%s: Invalid port mapper client = %d\n", + __func__, nl_client); + return ret; + } + iwpm_set_registered(nl_client, 0); + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + if (!iwpm_mapinfo_available()) + return 0; + iwpm_pid = cb->nlh->nlmsg_pid; + pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n", + __func__, iwpm_pid); + ret = iwpm_send_mapinfo(nl_client, iwpm_pid); + return ret; +} +EXPORT_SYMBOL(iwpm_mapping_info_cb); + +/* netlink attribute policy for the received mapping info ack */ +static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = { + [IWPM_NLA_MAPINFO_SEQ] = { .type = NLA_U32 }, + [IWPM_NLA_MAPINFO_SEND_NUM] = { .type = NLA_U32 }, + [IWPM_NLA_MAPINFO_ACK_NUM] = { .type = NLA_U32 } +}; + +/* + * iwpm_ack_mapping_info_cb - Process a port mapper ack for + * the provided mapping info records + */ +int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nlattr *nltb[IWPM_NLA_MAPINFO_NUM_MAX]; + u32 mapinfo_send, mapinfo_ack; + const char *msg_type = "Mapping Info Ack"; + + if (iwpm_parse_nlmsg(cb, IWPM_NLA_MAPINFO_NUM_MAX, + ack_mapinfo_policy, nltb, msg_type)) + return -EINVAL; + mapinfo_send = nla_get_u32(nltb[IWPM_NLA_MAPINFO_SEND_NUM]); + mapinfo_ack = nla_get_u32(nltb[IWPM_NLA_MAPINFO_ACK_NUM]); + if (mapinfo_ack != mapinfo_send) + pr_info("%s: Invalid mapinfo number (sent = %u ack-ed = %u)\n", + __func__, mapinfo_send, mapinfo_ack); + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + return 0; +} +EXPORT_SYMBOL(iwpm_ack_mapping_info_cb); + +/* netlink attribute policy for the received port mapper error message */ +static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = { + [IWPM_NLA_ERR_SEQ] = { .type = NLA_U32 }, + [IWPM_NLA_ERR_CODE] = { .type = NLA_U16 }, +}; + +/* + * iwpm_mapping_error_cb - Process a port mapper error message + */ +int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct iwpm_nlmsg_request *nlmsg_request = NULL; + int nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type); + struct nlattr *nltb[IWPM_NLA_ERR_MAX]; + u32 msg_seq; + u16 err_code; + const char *msg_type = "Mapping Error Msg"; + + if (iwpm_parse_nlmsg(cb, IWPM_NLA_ERR_MAX, + map_error_policy, nltb, msg_type)) + return -EINVAL; + + msg_seq = nla_get_u32(nltb[IWPM_NLA_ERR_SEQ]); + err_code = nla_get_u16(nltb[IWPM_NLA_ERR_CODE]); + pr_info("%s: Received msg seq = %u err code = %u client = %d\n", + __func__, msg_seq, err_code, nl_client); + /* look for nlmsg_request */ + nlmsg_request = iwpm_find_nlmsg_request(msg_seq); + if (!nlmsg_request) { + /* not all errors have associated requests */ + pr_debug("Could not find matching req (seq = %u)\n", msg_seq); + return 0; + } + atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq); + nlmsg_request->err_code = err_code; + nlmsg_request->request_done = 1; + /* always for found request */ + kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); + barrier(); + wake_up(&nlmsg_request->waitq); + return 0; +} +EXPORT_SYMBOL(iwpm_mapping_error_cb); diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c new file mode 100644 index 00000000000..69e9f84c160 --- /dev/null +++ b/drivers/infiniband/core/iwpm_util.c @@ -0,0 +1,607 @@ +/* + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "iwpm_util.h" + +#define IWPM_HASH_BUCKET_SIZE 512 +#define IWPM_HASH_BUCKET_MASK (IWPM_HASH_BUCKET_SIZE - 1) + +static LIST_HEAD(iwpm_nlmsg_req_list); +static DEFINE_SPINLOCK(iwpm_nlmsg_req_lock); + +static struct hlist_head *iwpm_hash_bucket; +static DEFINE_SPINLOCK(iwpm_mapinfo_lock); + +static DEFINE_MUTEX(iwpm_admin_lock); +static struct iwpm_admin_data iwpm_admin; + +int iwpm_init(u8 nl_client) +{ + if (iwpm_valid_client(nl_client)) + return -EINVAL; + mutex_lock(&iwpm_admin_lock); + if (atomic_read(&iwpm_admin.refcount) == 0) { + iwpm_hash_bucket = kzalloc(IWPM_HASH_BUCKET_SIZE * + sizeof(struct hlist_head), GFP_KERNEL); + if (!iwpm_hash_bucket) { + mutex_unlock(&iwpm_admin_lock); + pr_err("%s Unable to create mapinfo hash table\n", __func__); + return -ENOMEM; + } + } + atomic_inc(&iwpm_admin.refcount); + mutex_unlock(&iwpm_admin_lock); + iwpm_set_valid(nl_client, 1); + return 0; +} +EXPORT_SYMBOL(iwpm_init); + +static void free_hash_bucket(void); + +int iwpm_exit(u8 nl_client) +{ + + if (!iwpm_valid_client(nl_client)) + return -EINVAL; + mutex_lock(&iwpm_admin_lock); + if (atomic_read(&iwpm_admin.refcount) == 0) { + mutex_unlock(&iwpm_admin_lock); + pr_err("%s Incorrect usage - negative refcount\n", __func__); + return -EINVAL; + } + if (atomic_dec_and_test(&iwpm_admin.refcount)) { + free_hash_bucket(); + pr_debug("%s: Mapinfo hash table is destroyed\n", __func__); + } + mutex_unlock(&iwpm_admin_lock); + iwpm_set_valid(nl_client, 0); + return 0; +} +EXPORT_SYMBOL(iwpm_exit); + +static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage *, + struct sockaddr_storage *); + +int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr, + struct sockaddr_storage *mapped_sockaddr, + u8 nl_client) +{ + struct hlist_head *hash_bucket_head; + struct iwpm_mapping_info *map_info; + unsigned long flags; + + if (!iwpm_valid_client(nl_client)) + return -EINVAL; + map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL); + if (!map_info) { + pr_err("%s: Unable to allocate a mapping info\n", __func__); + return -ENOMEM; + } + memcpy(&map_info->local_sockaddr, local_sockaddr, + sizeof(struct sockaddr_storage)); + memcpy(&map_info->mapped_sockaddr, mapped_sockaddr, + sizeof(struct sockaddr_storage)); + map_info->nl_client = nl_client; + + spin_lock_irqsave(&iwpm_mapinfo_lock, flags); + if (iwpm_hash_bucket) { + hash_bucket_head = get_hash_bucket_head( + &map_info->local_sockaddr, + &map_info->mapped_sockaddr); + hlist_add_head(&map_info->hlist_node, hash_bucket_head); + } + spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); + return 0; +} +EXPORT_SYMBOL(iwpm_create_mapinfo); + +int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr, + struct sockaddr_storage *mapped_local_addr) +{ + struct hlist_node *tmp_hlist_node; + struct hlist_head *hash_bucket_head; + struct iwpm_mapping_info *map_info = NULL; + unsigned long flags; + int ret = -EINVAL; + + spin_lock_irqsave(&iwpm_mapinfo_lock, flags); + if (iwpm_hash_bucket) { + hash_bucket_head = get_hash_bucket_head( + local_sockaddr, + mapped_local_addr); + hlist_for_each_entry_safe(map_info, tmp_hlist_node, + hash_bucket_head, hlist_node) { + + if (!iwpm_compare_sockaddr(&map_info->mapped_sockaddr, + mapped_local_addr)) { + + hlist_del_init(&map_info->hlist_node); + kfree(map_info); + ret = 0; + break; + } + } + } + spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); + return ret; +} +EXPORT_SYMBOL(iwpm_remove_mapinfo); + +static void free_hash_bucket(void) +{ + struct hlist_node *tmp_hlist_node; + struct iwpm_mapping_info *map_info; + unsigned long flags; + int i; + + /* remove all the mapinfo data from the list */ + spin_lock_irqsave(&iwpm_mapinfo_lock, flags); + for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + hlist_for_each_entry_safe(map_info, tmp_hlist_node, + &iwpm_hash_bucket[i], hlist_node) { + + hlist_del_init(&map_info->hlist_node); + kfree(map_info); + } + } + /* free the hash list */ + kfree(iwpm_hash_bucket); + iwpm_hash_bucket = NULL; + spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +} + +struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, + u8 nl_client, gfp_t gfp) +{ + struct iwpm_nlmsg_request *nlmsg_request = NULL; + unsigned long flags; + + nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp); + if (!nlmsg_request) { + pr_err("%s Unable to allocate a nlmsg_request\n", __func__); + return NULL; + } + spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); + list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list); + spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); + + kref_init(&nlmsg_request->kref); + kref_get(&nlmsg_request->kref); + nlmsg_request->nlmsg_seq = nlmsg_seq; + nlmsg_request->nl_client = nl_client; + nlmsg_request->request_done = 0; + nlmsg_request->err_code = 0; + return nlmsg_request; +} + +void iwpm_free_nlmsg_request(struct kref *kref) +{ + struct iwpm_nlmsg_request *nlmsg_request; + unsigned long flags; + + nlmsg_request = container_of(kref, struct iwpm_nlmsg_request, kref); + + spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); + list_del_init(&nlmsg_request->inprocess_list); + spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); + + if (!nlmsg_request->request_done) + pr_debug("%s Freeing incomplete nlmsg request (seq = %u).\n", + __func__, nlmsg_request->nlmsg_seq); + kfree(nlmsg_request); +} + +struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq) +{ + struct iwpm_nlmsg_request *nlmsg_request; + struct iwpm_nlmsg_request *found_request = NULL; + unsigned long flags; + + spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags); + list_for_each_entry(nlmsg_request, &iwpm_nlmsg_req_list, + inprocess_list) { + if (nlmsg_request->nlmsg_seq == echo_seq) { + found_request = nlmsg_request; + kref_get(&nlmsg_request->kref); + break; + } + } + spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags); + return found_request; +} + +int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request) +{ + int ret; + init_waitqueue_head(&nlmsg_request->waitq); + + ret = wait_event_timeout(nlmsg_request->waitq, + (nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT); + if (!ret) { + ret = -EINVAL; + pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n", + __func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq); + } else { + ret = nlmsg_request->err_code; + } + kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request); + return ret; +} + +int iwpm_get_nlmsg_seq(void) +{ + return atomic_inc_return(&iwpm_admin.nlmsg_seq); +} + +int iwpm_valid_client(u8 nl_client) +{ + if (nl_client >= RDMA_NL_NUM_CLIENTS) + return 0; + return iwpm_admin.client_list[nl_client]; +} + +void iwpm_set_valid(u8 nl_client, int valid) +{ + if (nl_client >= RDMA_NL_NUM_CLIENTS) + return; + iwpm_admin.client_list[nl_client] = valid; +} + +/* valid client */ +int iwpm_registered_client(u8 nl_client) +{ + return iwpm_admin.reg_list[nl_client]; +} + +/* valid client */ +void iwpm_set_registered(u8 nl_client, int reg) +{ + iwpm_admin.reg_list[nl_client] = reg; +} + +int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, + struct sockaddr_storage *b_sockaddr) +{ + if (a_sockaddr->ss_family != b_sockaddr->ss_family) + return 1; + if (a_sockaddr->ss_family == AF_INET) { + struct sockaddr_in *a4_sockaddr = + (struct sockaddr_in *)a_sockaddr; + struct sockaddr_in *b4_sockaddr = + (struct sockaddr_in *)b_sockaddr; + if (!memcmp(&a4_sockaddr->sin_addr, + &b4_sockaddr->sin_addr, sizeof(struct in_addr)) + && a4_sockaddr->sin_port == b4_sockaddr->sin_port) + return 0; + + } else if (a_sockaddr->ss_family == AF_INET6) { + struct sockaddr_in6 *a6_sockaddr = + (struct sockaddr_in6 *)a_sockaddr; + struct sockaddr_in6 *b6_sockaddr = + (struct sockaddr_in6 *)b_sockaddr; + if (!memcmp(&a6_sockaddr->sin6_addr, + &b6_sockaddr->sin6_addr, sizeof(struct in6_addr)) + && a6_sockaddr->sin6_port == b6_sockaddr->sin6_port) + return 0; + + } else { + pr_err("%s: Invalid sockaddr family\n", __func__); + } + return 1; +} + +struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, + int nl_client) +{ + struct sk_buff *skb = NULL; + + skb = dev_alloc_skb(NLMSG_GOODSIZE); + if (!skb) { + pr_err("%s Unable to allocate skb\n", __func__); + goto create_nlmsg_exit; + } + if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op, + NLM_F_REQUEST))) { + pr_warn("%s: Unable to put the nlmsg header\n", __func__); + dev_kfree_skb(skb); + skb = NULL; + } +create_nlmsg_exit: + return skb; +} + +int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, + const struct nla_policy *nlmsg_policy, + struct nlattr *nltb[], const char *msg_type) +{ + int nlh_len = 0; + int ret; + const char *err_str = ""; + + ret = nlmsg_validate(cb->nlh, nlh_len, policy_max-1, nlmsg_policy); + if (ret) { + err_str = "Invalid attribute"; + goto parse_nlmsg_error; + } + ret = nlmsg_parse(cb->nlh, nlh_len, nltb, policy_max-1, nlmsg_policy); + if (ret) { + err_str = "Unable to parse the nlmsg"; + goto parse_nlmsg_error; + } + ret = iwpm_validate_nlmsg_attr(nltb, policy_max); + if (ret) { + err_str = "Invalid NULL attribute"; + goto parse_nlmsg_error; + } + return 0; +parse_nlmsg_error: + pr_warn("%s: %s (msg type %s ret = %d)\n", + __func__, err_str, msg_type, ret); + return ret; +} + +void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg) +{ + struct sockaddr_in6 *sockaddr_v6; + struct sockaddr_in *sockaddr_v4; + + switch (sockaddr->ss_family) { + case AF_INET: + sockaddr_v4 = (struct sockaddr_in *)sockaddr; + pr_debug("%s IPV4 %pI4: %u(0x%04X)\n", + msg, &sockaddr_v4->sin_addr, + ntohs(sockaddr_v4->sin_port), + ntohs(sockaddr_v4->sin_port)); + break; + case AF_INET6: + sockaddr_v6 = (struct sockaddr_in6 *)sockaddr; + pr_debug("%s IPV6 %pI6: %u(0x%04X)\n", + msg, &sockaddr_v6->sin6_addr, + ntohs(sockaddr_v6->sin6_port), + ntohs(sockaddr_v6->sin6_port)); + break; + default: + break; + } +} + +static u32 iwpm_ipv6_jhash(struct sockaddr_in6 *ipv6_sockaddr) +{ + u32 ipv6_hash = jhash(&ipv6_sockaddr->sin6_addr, sizeof(struct in6_addr), 0); + u32 hash = jhash_2words(ipv6_hash, (__force u32) ipv6_sockaddr->sin6_port, 0); + return hash; +} + +static u32 iwpm_ipv4_jhash(struct sockaddr_in *ipv4_sockaddr) +{ + u32 ipv4_hash = jhash(&ipv4_sockaddr->sin_addr, sizeof(struct in_addr), 0); + u32 hash = jhash_2words(ipv4_hash, (__force u32) ipv4_sockaddr->sin_port, 0); + return hash; +} + +static struct hlist_head *get_hash_bucket_head(struct sockaddr_storage + *local_sockaddr, + struct sockaddr_storage + *mapped_sockaddr) +{ + u32 local_hash, mapped_hash, hash; + + if (local_sockaddr->ss_family == AF_INET) { + local_hash = iwpm_ipv4_jhash((struct sockaddr_in *) local_sockaddr); + mapped_hash = iwpm_ipv4_jhash((struct sockaddr_in *) mapped_sockaddr); + + } else if (local_sockaddr->ss_family == AF_INET6) { + local_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) local_sockaddr); + mapped_hash = iwpm_ipv6_jhash((struct sockaddr_in6 *) mapped_sockaddr); + } else { + pr_err("%s: Invalid sockaddr family\n", __func__); + return NULL; + } + + if (local_hash == mapped_hash) /* if port mapper isn't available */ + hash = local_hash; + else + hash = jhash_2words(local_hash, mapped_hash, 0); + + return &iwpm_hash_bucket[hash & IWPM_HASH_BUCKET_MASK]; +} + +static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid) +{ + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + u32 msg_seq; + const char *err_str = ""; + int ret = -EINVAL; + + skb = iwpm_create_nlmsg(RDMA_NL_IWPM_MAPINFO_NUM, &nlh, nl_client); + if (!skb) { + err_str = "Unable to create a nlmsg"; + goto mapinfo_num_error; + } + nlh->nlmsg_seq = iwpm_get_nlmsg_seq(); + msg_seq = 0; + err_str = "Unable to put attribute of mapinfo number nlmsg"; + ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_MAPINFO_SEQ); + if (ret) + goto mapinfo_num_error; + ret = ibnl_put_attr(skb, nlh, sizeof(u32), + &mapping_num, IWPM_NLA_MAPINFO_SEND_NUM); + if (ret) + goto mapinfo_num_error; + ret = ibnl_unicast(skb, nlh, iwpm_pid); + if (ret) { + skb = NULL; + err_str = "Unable to send a nlmsg"; + goto mapinfo_num_error; + } + pr_debug("%s: Sent mapping number = %d\n", __func__, mapping_num); + return 0; +mapinfo_num_error: + pr_info("%s: %s\n", __func__, err_str); + if (skb) + dev_kfree_skb(skb); + return ret; +} + +static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid) +{ + struct nlmsghdr *nlh = NULL; + int ret = 0; + + if (!skb) + return ret; + if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, + RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { + pr_warn("%s Unable to put NLMSG_DONE\n", __func__); + return -ENOMEM; + } + nlh->nlmsg_type = NLMSG_DONE; + ret = ibnl_unicast(skb, (struct nlmsghdr *)skb->data, iwpm_pid); + if (ret) + pr_warn("%s Unable to send a nlmsg\n", __func__); + return ret; +} + +int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid) +{ + struct iwpm_mapping_info *map_info; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh; + int skb_num = 0, mapping_num = 0; + int i = 0, nlmsg_bytes = 0; + unsigned long flags; + const char *err_str = ""; + int ret; + + skb = dev_alloc_skb(NLMSG_GOODSIZE); + if (!skb) { + ret = -ENOMEM; + err_str = "Unable to allocate skb"; + goto send_mapping_info_exit; + } + skb_num++; + spin_lock_irqsave(&iwpm_mapinfo_lock, flags); + for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + hlist_for_each_entry(map_info, &iwpm_hash_bucket[i], + hlist_node) { + if (map_info->nl_client != nl_client) + continue; + nlh = NULL; + if (!(ibnl_put_msg(skb, &nlh, 0, 0, nl_client, + RDMA_NL_IWPM_MAPINFO, NLM_F_MULTI))) { + ret = -ENOMEM; + err_str = "Unable to put the nlmsg header"; + goto send_mapping_info_unlock; + } + err_str = "Unable to put attribute of the nlmsg"; + ret = ibnl_put_attr(skb, nlh, + sizeof(struct sockaddr_storage), + &map_info->local_sockaddr, + IWPM_NLA_MAPINFO_LOCAL_ADDR); + if (ret) + goto send_mapping_info_unlock; + + ret = ibnl_put_attr(skb, nlh, + sizeof(struct sockaddr_storage), + &map_info->mapped_sockaddr, + IWPM_NLA_MAPINFO_MAPPED_ADDR); + if (ret) + goto send_mapping_info_unlock; + + iwpm_print_sockaddr(&map_info->local_sockaddr, + "send_mapping_info: Local sockaddr:"); + iwpm_print_sockaddr(&map_info->mapped_sockaddr, + "send_mapping_info: Mapped local sockaddr:"); + mapping_num++; + nlmsg_bytes += nlh->nlmsg_len; + + /* check if all mappings can fit in one skb */ + if (NLMSG_GOODSIZE - nlmsg_bytes < nlh->nlmsg_len * 2) { + /* and leave room for NLMSG_DONE */ + nlmsg_bytes = 0; + skb_num++; + spin_unlock_irqrestore(&iwpm_mapinfo_lock, + flags); + /* send the skb */ + ret = send_nlmsg_done(skb, nl_client, iwpm_pid); + skb = NULL; + if (ret) { + err_str = "Unable to send map info"; + goto send_mapping_info_exit; + } + if (skb_num == IWPM_MAPINFO_SKB_COUNT) { + ret = -ENOMEM; + err_str = "Insufficient skbs for map info"; + goto send_mapping_info_exit; + } + skb = dev_alloc_skb(NLMSG_GOODSIZE); + if (!skb) { + ret = -ENOMEM; + err_str = "Unable to allocate skb"; + goto send_mapping_info_exit; + } + spin_lock_irqsave(&iwpm_mapinfo_lock, flags); + } + } + } +send_mapping_info_unlock: + spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); +send_mapping_info_exit: + if (ret) { + pr_warn("%s: %s (ret = %d)\n", __func__, err_str, ret); + if (skb) + dev_kfree_skb(skb); + return ret; + } + send_nlmsg_done(skb, nl_client, iwpm_pid); + return send_mapinfo_num(mapping_num, nl_client, iwpm_pid); +} + +int iwpm_mapinfo_available(void) +{ + unsigned long flags; + int full_bucket = 0, i = 0; + + spin_lock_irqsave(&iwpm_mapinfo_lock, flags); + if (iwpm_hash_bucket) { + for (i = 0; i < IWPM_HASH_BUCKET_SIZE; i++) { + if (!hlist_empty(&iwpm_hash_bucket[i])) { + full_bucket = 1; + break; + } + } + } + spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags); + return full_bucket; +} diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h new file mode 100644 index 00000000000..9777c869a14 --- /dev/null +++ b/drivers/infiniband/core/iwpm_util.h @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _IWPM_UTIL_H +#define _IWPM_UTIL_H + +#include <linux/module.h> +#include <linux/io.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/spinlock.h> +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/delay.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <linux/jhash.h> +#include <linux/kref.h> +#include <net/netlink.h> +#include <linux/errno.h> +#include <rdma/iw_portmap.h> +#include <rdma/rdma_netlink.h> + + +#define IWPM_NL_RETRANS 3 +#define IWPM_NL_TIMEOUT (10*HZ) +#define IWPM_MAPINFO_SKB_COUNT 20 + +#define IWPM_PID_UNDEFINED -1 +#define IWPM_PID_UNAVAILABLE -2 + +struct iwpm_nlmsg_request { + struct list_head inprocess_list; + __u32 nlmsg_seq; + void *req_buffer; + u8 nl_client; + u8 request_done; + u16 err_code; + wait_queue_head_t waitq; + struct kref kref; +}; + +struct iwpm_mapping_info { + struct hlist_node hlist_node; + struct sockaddr_storage local_sockaddr; + struct sockaddr_storage mapped_sockaddr; + u8 nl_client; +}; + +struct iwpm_admin_data { + atomic_t refcount; + atomic_t nlmsg_seq; + int client_list[RDMA_NL_NUM_CLIENTS]; + int reg_list[RDMA_NL_NUM_CLIENTS]; +}; + +/** + * iwpm_get_nlmsg_request - Allocate and initialize netlink message request + * @nlmsg_seq: Sequence number of the netlink message + * @nl_client: The index of the netlink client + * @gfp: Indicates how the memory for the request should be allocated + * + * Returns the newly allocated netlink request object if successful, + * otherwise returns NULL + */ +struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, + u8 nl_client, gfp_t gfp); + +/** + * iwpm_free_nlmsg_request - Deallocate netlink message request + * @kref: Holds reference of netlink message request + */ +void iwpm_free_nlmsg_request(struct kref *kref); + +/** + * iwpm_find_nlmsg_request - Find netlink message request in the request list + * @echo_seq: Sequence number of the netlink request to find + * + * Returns the found netlink message request, + * if not found, returns NULL + */ +struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq); + +/** + * iwpm_wait_complete_req - Block while servicing the netlink request + * @nlmsg_request: Netlink message request to service + * + * Wakes up, after the request is completed or expired + * Returns 0 if the request is complete without error + */ +int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request); + +/** + * iwpm_get_nlmsg_seq - Get the sequence number for a netlink + * message to send to the port mapper + * + * Returns the sequence number for the netlink message. + */ +int iwpm_get_nlmsg_seq(void); + +/** + * iwpm_valid_client - Check if the port mapper client is valid + * @nl_client: The index of the netlink client + * + * Valid clients need to call iwpm_init() before using + * the port mapper + */ +int iwpm_valid_client(u8 nl_client); + +/** + * iwpm_set_valid - Set the port mapper client to valid or not + * @nl_client: The index of the netlink client + * @valid: 1 if valid or 0 if invalid + */ +void iwpm_set_valid(u8 nl_client, int valid); + +/** + * iwpm_registered_client - Check if the port mapper client is registered + * @nl_client: The index of the netlink client + * + * Call iwpm_register_pid() to register a client + */ +int iwpm_registered_client(u8 nl_client); + +/** + * iwpm_set_registered - Set the port mapper client to registered or not + * @nl_client: The index of the netlink client + * @reg: 1 if registered or 0 if not + */ +void iwpm_set_registered(u8 nl_client, int reg); + +/** + * iwpm_send_mapinfo - Send local and mapped IPv4/IPv6 address info of + * a client to the user space port mapper + * @nl_client: The index of the netlink client + * @iwpm_pid: The pid of the user space port mapper + * + * If successful, returns the number of sent mapping info records + */ +int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid); + +/** + * iwpm_mapinfo_available - Check if any mapping info records is available + * in the hash table + * + * Returns 1 if mapping information is available, otherwise returns 0 + */ +int iwpm_mapinfo_available(void); + +/** + * iwpm_compare_sockaddr - Compare two sockaddr storage structs + * + * Returns 0 if they are holding the same ip/tcp address info, + * otherwise returns 1 + */ +int iwpm_compare_sockaddr(struct sockaddr_storage *a_sockaddr, + struct sockaddr_storage *b_sockaddr); + +/** + * iwpm_validate_nlmsg_attr - Check for NULL netlink attributes + * @nltb: Holds address of each netlink message attributes + * @nla_count: Number of netlink message attributes + * + * Returns error if any of the nla_count attributes is NULL + */ +static inline int iwpm_validate_nlmsg_attr(struct nlattr *nltb[], + int nla_count) +{ + int i; + for (i = 1; i < nla_count; i++) { + if (!nltb[i]) + return -EINVAL; + } + return 0; +} + +/** + * iwpm_create_nlmsg - Allocate skb and form a netlink message + * @nl_op: Netlink message opcode + * @nlh: Holds address of the netlink message header in skb + * @nl_client: The index of the netlink client + * + * Returns the newly allcated skb, or NULL if the tailroom of the skb + * is insufficient to store the message header and payload + */ +struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh, + int nl_client); + +/** + * iwpm_parse_nlmsg - Validate and parse the received netlink message + * @cb: Netlink callback structure + * @policy_max: Maximum attribute type to be expected + * @nlmsg_policy: Validation policy + * @nltb: Array to store policy_max parsed elements + * @msg_type: Type of netlink message + * + * Returns 0 on success or a negative error code + */ +int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max, + const struct nla_policy *nlmsg_policy, + struct nlattr *nltb[], const char *msg_type); + +/** + * iwpm_print_sockaddr - Print IPv4/IPv6 address and TCP port + * @sockaddr: Socket address to print + * @msg: Message to print + */ +void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg); +#endif diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2fe428bba54..ab31f136d04 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1022,12 +1022,21 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr) mad_send_wr->send_buf.mad, sge[0].length, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr))) + return -ENOMEM; + mad_send_wr->header_mapping = sge[0].addr; sge[1].addr = ib_dma_map_single(mad_agent->device, ib_get_payload(mad_send_wr), sge[1].length, DMA_TO_DEVICE); + if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) { + ib_dma_unmap_single(mad_agent->device, + mad_send_wr->header_mapping, + sge[0].length, DMA_TO_DEVICE); + return -ENOMEM; + } mad_send_wr->payload_mapping = sge[1].addr; spin_lock_irqsave(&qp_info->send_queue.lock, flags); @@ -1842,6 +1851,26 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } } +static bool generate_unmatched_resp(struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || + recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, sizeof *response); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + response->mad.mad.mad_hdr.status = + cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) + response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION; + + return true; + } else { + return false; + } +} static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { @@ -1851,6 +1880,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_mad_list_head *mad_list; struct ib_mad_agent_private *mad_agent; int port_num; + int ret = IB_MAD_RESULT_SUCCESS; mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; qp_info = mad_list->mad_queue->qp_info; @@ -1934,8 +1964,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, local: /* Give driver "right of first refusal" on incoming MAD */ if (port_priv->device->process_mad) { - int ret; - ret = port_priv->device->process_mad(port_priv->device, 0, port_priv->port_num, wc, &recv->grh, @@ -1963,6 +1991,10 @@ local: * or via recv_handler in ib_mad_complete_recv() */ recv = NULL; + } else if ((ret & IB_MAD_RESULT_SUCCESS) && + generate_unmatched_resp(recv, response)) { + agent_send_response(&response->mad.mad, &recv->grh, wc, + port_priv->device, port_num, qp_info->qp->qp_num); } out: @@ -1981,7 +2013,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) unsigned long delay; if (list_empty(&mad_agent_priv->wait_list)) { - __cancel_delayed_work(&mad_agent_priv->timed_work); + cancel_delayed_work(&mad_agent_priv->timed_work); } else { mad_send_wr = list_entry(mad_agent_priv->wait_list.next, struct ib_mad_send_wr_private, @@ -1990,13 +2022,11 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv) if (time_after(mad_agent_priv->timeout, mad_send_wr->timeout)) { mad_agent_priv->timeout = mad_send_wr->timeout; - __cancel_delayed_work(&mad_agent_priv->timed_work); delay = mad_send_wr->timeout - jiffies; if ((long)delay <= 0) delay = 1; - queue_delayed_work(mad_agent_priv->qp_info-> - port_priv->wq, - &mad_agent_priv->timed_work, delay); + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } } } @@ -2029,11 +2059,9 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr) list_add(&mad_send_wr->agent_list, list_item); /* Reschedule a work item if we have a shorter timeout */ - if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) { - __cancel_delayed_work(&mad_agent_priv->timed_work); - queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq, - &mad_agent_priv->timed_work, delay); - } + if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) + mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, + &mad_agent_priv->timed_work, delay); } void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, @@ -2571,6 +2599,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info, sizeof *mad_priv - sizeof mad_priv->header, DMA_FROM_DEVICE); + if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device, + sg_list.addr))) { + ret = -ENOMEM; + break; + } mad_priv->header.mapping = sg_list.addr; recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list; mad_priv->header.mad_list.mad_queue = recv_queue; @@ -2644,6 +2677,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) int ret, i; struct ib_qp_attr *attr; struct ib_qp *qp; + u16 pkey_index; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) { @@ -2651,6 +2685,11 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) return -ENOMEM; } + ret = ib_find_pkey(port_priv->device, port_priv->port_num, + IB_DEFAULT_PKEY_FULL, &pkey_index); + if (ret) + pkey_index = 0; + for (i = 0; i < IB_MAD_QPS_CORE; i++) { qp = port_priv->qp_info[i].qp; if (!qp) @@ -2661,7 +2700,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv) * one is needed for the Reset to Init transition */ attr->qp_state = IB_QPS_INIT; - attr->pkey_index = 0; + attr->pkey_index = pkey_index; attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY; ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY); diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index d1c8196d15d..23dd5a5c759 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -103,17 +103,19 @@ int ibnl_remove_client(int index) EXPORT_SYMBOL(ibnl_remove_client); void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, - int len, int client, int op) + int len, int client, int op, int flags) { unsigned char *prev_tail; prev_tail = skb_tail_pointer(skb); - *nlh = NLMSG_NEW(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), - len, NLM_F_MULTI); + *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), + len, flags); + if (!*nlh) + goto out_nlmsg_trim; (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail; - return NLMSG_DATA(*nlh); + return nlmsg_data(*nlh); -nlmsg_failure: +out_nlmsg_trim: nlmsg_trim(skb, prev_tail); return NULL; } @@ -125,7 +127,8 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, unsigned char *prev_tail; prev_tail = skb_tail_pointer(skb); - NLA_PUT(skb, type, len, data); + if (nla_put(skb, type, len, data)) + goto nla_put_failure; nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail; return 0; @@ -145,11 +148,16 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) list_for_each_entry(client, &client_list, list) { if (client->index == index) { if (op < 0 || op >= client->nops || - !client->cb_table[RDMA_NL_GET_OP(op)].dump) + !client->cb_table[op].dump) return -EINVAL; - return netlink_dump_start(nls, skb, nlh, - client->cb_table[op].dump, - NULL, 0); + + { + struct netlink_dump_control c = { + .dump = client->cb_table[op].dump, + .module = client->cb_table[op].module, + }; + return netlink_dump_start(nls, skb, nlh, &c); + } } } @@ -164,10 +172,27 @@ static void ibnl_rcv(struct sk_buff *skb) mutex_unlock(&ibnl_mutex); } +int ibnl_unicast(struct sk_buff *skb, struct nlmsghdr *nlh, + __u32 pid) +{ + return nlmsg_unicast(nls, skb, pid); +} +EXPORT_SYMBOL(ibnl_unicast); + +int ibnl_multicast(struct sk_buff *skb, struct nlmsghdr *nlh, + unsigned int group, gfp_t flags) +{ + return nlmsg_multicast(nls, skb, 0, group, flags); +} +EXPORT_SYMBOL(ibnl_multicast); + int __init ibnl_init(void) { - nls = netlink_kernel_create(&init_net, NETLINK_RDMA, 0, ibnl_rcv, - NULL, THIS_MODULE); + struct netlink_kernel_cfg cfg = { + .input = ibnl_rcv, + }; + + nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg); if (!nls) { pr_warn("Failed to create netlink socket\n"); return -ENOMEM; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index fbbfa24cf57..233eaf541f5 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -42,7 +42,7 @@ #include <linux/kref.h> #include <linux/idr.h> #include <linux/workqueue.h> - +#include <uapi/linux/if_ether.h> #include <rdma/ib_pack.h> #include <rdma/ib_cache.h> #include "sa.h" @@ -94,6 +94,12 @@ struct ib_sa_path_query { struct ib_sa_query sa_query; }; +struct ib_sa_guidinfo_query { + void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); + void *context; + struct ib_sa_query sa_query; +}; + struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; @@ -347,6 +353,34 @@ static const struct ib_field service_rec_table[] = { .size_bits = 2*64 }, }; +#define GUIDINFO_REC_FIELD(field) \ + .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ + .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \ + .field_name = "sa_guidinfo_rec:" #field + +static const struct ib_field guidinfo_rec_table[] = { + { GUIDINFO_REC_FIELD(lid), + .offset_words = 0, + .offset_bits = 0, + .size_bits = 16 }, + { GUIDINFO_REC_FIELD(block_num), + .offset_words = 0, + .offset_bits = 16, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res1), + .offset_words = 0, + .offset_bits = 24, + .size_bits = 8 }, + { GUIDINFO_REC_FIELD(res2), + .offset_words = 1, + .offset_bits = 0, + .size_bits = 32 }, + { GUIDINFO_REC_FIELD(guid_info_list), + .offset_words = 2, + .offset_bits = 0, + .size_bits = 512 }, +}; + static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); @@ -522,6 +556,13 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, ah_attr->grh.hop_limit = rec->hop_limit; ah_attr->grh.traffic_class = rec->traffic_class; } + if (force_grh) { + memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN); + ah_attr->vlan_id = rec->vlan_id; + } else { + ah_attr->vlan_id = 0xffff; + } + return 0; } EXPORT_SYMBOL(ib_init_ah_from_path); @@ -577,19 +618,21 @@ static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent) static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask) { + bool preload = !!(gfp_mask & __GFP_WAIT); unsigned long flags; int ret, id; -retry: - if (!idr_pre_get(&query_idr, gfp_mask)) - return -ENOMEM; + if (preload) + idr_preload(gfp_mask); spin_lock_irqsave(&idr_lock, flags); - ret = idr_get_new(&query_idr, query, &id); + + id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT); + spin_unlock_irqrestore(&idr_lock, flags); - if (ret == -EAGAIN) - goto retry; - if (ret) - return ret; + if (preload) + idr_preload_end(); + if (id < 0) + return id; query->mad_buf->timeout_ms = timeout_ms; query->mad_buf->context[0] = query; @@ -616,6 +659,12 @@ void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec) } EXPORT_SYMBOL(ib_sa_unpack_path); +void ib_sa_pack_path(struct ib_sa_path_rec *rec, void *attribute) +{ + ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); +} +EXPORT_SYMBOL(ib_sa_pack_path); + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) @@ -628,6 +677,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); + rec.vlan_id = 0xffff; + memset(rec.dmac, 0, ETH_ALEN); + memset(rec.smac, 0, ETH_ALEN); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); @@ -945,6 +997,105 @@ err1: return ret; } +/* Support GuidInfoRecord */ +static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, + int status, + struct ib_sa_mad *mad) +{ + struct ib_sa_guidinfo_query *query = + container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); + + if (mad) { + struct ib_sa_guidinfo_rec rec; + + ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), + mad->data, &rec); + query->callback(status, &rec, query->context); + } else + query->callback(status, NULL, query->context); +} + +static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) +{ + kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); +} + +int ib_sa_guid_info_rec_query(struct ib_sa_client *client, + struct ib_device *device, u8 port_num, + struct ib_sa_guidinfo_rec *rec, + ib_sa_comp_mask comp_mask, u8 method, + int timeout_ms, gfp_t gfp_mask, + void (*callback)(int status, + struct ib_sa_guidinfo_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query) +{ + struct ib_sa_guidinfo_query *query; + struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); + struct ib_sa_port *port; + struct ib_mad_agent *agent; + struct ib_sa_mad *mad; + int ret; + + if (!sa_dev) + return -ENODEV; + + if (method != IB_MGMT_METHOD_GET && + method != IB_MGMT_METHOD_SET && + method != IB_SA_METHOD_DELETE) { + return -EINVAL; + } + + port = &sa_dev->port[port_num - sa_dev->start_port]; + agent = port->agent; + + query = kmalloc(sizeof *query, gfp_mask); + if (!query) + return -ENOMEM; + + query->sa_query.port = port; + ret = alloc_mad(&query->sa_query, gfp_mask); + if (ret) + goto err1; + + ib_sa_client_get(client); + query->sa_query.client = client; + query->callback = callback; + query->context = context; + + mad = query->sa_query.mad_buf->mad; + init_mad(mad, agent); + + query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; + query->sa_query.release = ib_sa_guidinfo_rec_release; + + mad->mad_hdr.method = method; + mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); + mad->sa_hdr.comp_mask = comp_mask; + + ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, + mad->data); + + *sa_query = &query->sa_query; + + ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); + if (ret < 0) + goto err2; + + return ret; + +err2: + *sa_query = NULL; + ib_sa_client_put(query->sa_query.client); + free_mad(&query->sa_query); + +err1: + kfree(query); + return ret; +} +EXPORT_SYMBOL(ib_sa_guid_info_rec_query); + static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c61bca30fd2..cbd0383f622 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -179,32 +179,36 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, { struct ib_port_attr attr; char *speed = ""; - int rate; + int rate; /* in deci-Gb/sec */ ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; - rate = (25 * attr.active_speed) / 10; - switch (attr.active_speed) { - case 2: + case IB_SPEED_DDR: speed = " DDR"; + rate = 50; break; - case 4: + case IB_SPEED_QDR: speed = " QDR"; + rate = 100; break; - case 8: + case IB_SPEED_FDR10: speed = " FDR10"; - rate = 10; + rate = 100; break; - case 16: + case IB_SPEED_FDR: speed = " FDR"; - rate = 14; + rate = 140; break; - case 32: + case IB_SPEED_EDR: speed = " EDR"; + rate = 250; + break; + case IB_SPEED_SDR: + default: /* default to SDR for invalid rates */ rate = 25; break; } @@ -214,7 +218,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, return -EINVAL; return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate, (attr.active_speed == 1) ? ".5" : "", + rate / 10, rate % 10 ? ".5" : "", ib_width_enum_to_int(attr.active_width), speed); } @@ -425,15 +429,19 @@ static void ib_port_release(struct kobject *kobj) struct attribute *a; int i; - for (i = 0; (a = p->gid_group.attrs[i]); ++i) - kfree(a); + if (p->gid_group.attrs) { + for (i = 0; (a = p->gid_group.attrs[i]); ++i) + kfree(a); - kfree(p->gid_group.attrs); + kfree(p->gid_group.attrs); + } - for (i = 0; (a = p->pkey_group.attrs[i]); ++i) - kfree(a); + if (p->pkey_group.attrs) { + for (i = 0; (a = p->pkey_group.attrs[i]); ++i) + kfree(a); - kfree(p->pkey_group.attrs); + kfree(p->pkey_group.attrs); + } kfree(p); } @@ -530,10 +538,12 @@ static int add_port(struct ib_device *device, int port_num, p->port_num = port_num; ret = kobject_init_and_add(&p->kobj, &port_type, - kobject_get(device->ports_parent), + device->ports_parent, "%d", port_num); - if (ret) - goto err_put; + if (ret) { + kfree(p); + return ret; + } ret = sysfs_create_group(&p->kobj, &pma_group); if (ret) @@ -541,8 +551,10 @@ static int add_port(struct ib_device *device, int port_num, p->gid_group.name = "gids"; p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len); - if (!p->gid_group.attrs) + if (!p->gid_group.attrs) { + ret = -ENOMEM; goto err_remove_pma; + } ret = sysfs_create_group(&p->kobj, &p->gid_group); if (ret) @@ -551,8 +563,10 @@ static int add_port(struct ib_device *device, int port_num, p->pkey_group.name = "pkeys"; p->pkey_group.attrs = alloc_group_attrs(show_port_pkey, attr.pkey_tbl_len); - if (!p->pkey_group.attrs) + if (!p->pkey_group.attrs) { + ret = -ENOMEM; goto err_remove_gid; + } ret = sysfs_create_group(&p->kobj, &p->pkey_group); if (ret) @@ -577,6 +591,7 @@ err_free_pkey: kfree(p->pkey_group.attrs[i]); kfree(p->pkey_group.attrs); + p->pkey_group.attrs = NULL; err_remove_gid: sysfs_remove_group(&p->kobj, &p->gid_group); @@ -586,13 +601,13 @@ err_free_gid: kfree(p->gid_group.attrs[i]); kfree(p->gid_group.attrs); + p->gid_group.attrs = NULL; err_remove_pma: sysfs_remove_group(&p->kobj, &pma_group); err_put: - kobject_put(device->ports_parent); - kfree(p); + kobject_put(&p->kobj); return ret; } @@ -604,6 +619,8 @@ static ssize_t show_node_type(struct device *device, switch (dev->node_type) { case RDMA_NODE_IB_CA: return sprintf(buf, "%d: CA\n", dev->node_type); case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type); + case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type); + case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type); case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type); case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type); default: return sprintf(buf, "%d: <unknown>\n", dev->node_type); @@ -799,6 +816,22 @@ static struct attribute_group iw_stats_group = { .attrs = iw_proto_stats_attrs, }; +static void free_port_list_attributes(struct ib_device *device) +{ + struct kobject *p, *t; + + list_for_each_entry_safe(p, t, &device->port_list, entry) { + struct ib_port *port = container_of(p, struct ib_port, kobj); + list_del(&p->entry); + sysfs_remove_group(p, &pma_group); + sysfs_remove_group(p, &port->pkey_group); + sysfs_remove_group(p, &port->gid_group); + kobject_put(p); + } + + kobject_put(device->ports_parent); +} + int ib_device_register_sysfs(struct ib_device *device, int (*port_callback)(struct ib_device *, u8, struct kobject *)) @@ -809,7 +842,7 @@ int ib_device_register_sysfs(struct ib_device *device, class_dev->class = &ib_class; class_dev->parent = device->dma_device; - dev_set_name(class_dev, device->name); + dev_set_name(class_dev, "%s", device->name); dev_set_drvdata(class_dev, device); INIT_LIST_HEAD(&device->port_list); @@ -825,7 +858,7 @@ int ib_device_register_sysfs(struct ib_device *device, } device->ports_parent = kobject_create_and_add("ports", - kobject_get(&class_dev->kobj)); + &class_dev->kobj); if (!device->ports_parent) { ret = -ENOMEM; goto err_put; @@ -852,21 +885,7 @@ int ib_device_register_sysfs(struct ib_device *device, return 0; err_put: - { - struct kobject *p, *t; - struct ib_port *port; - - list_for_each_entry_safe(p, t, &device->port_list, entry) { - list_del(&p->entry); - port = container_of(p, struct ib_port, kobj); - sysfs_remove_group(p, &pma_group); - sysfs_remove_group(p, &port->pkey_group); - sysfs_remove_group(p, &port->gid_group); - kobject_put(p); - } - } - - kobject_put(&class_dev->kobj); + free_port_list_attributes(device); err_unregister: device_unregister(class_dev); @@ -877,22 +896,18 @@ err: void ib_device_unregister_sysfs(struct ib_device *device) { - struct kobject *p, *t; - struct ib_port *port; - /* Hold kobject until ib_dealloc_device() */ - kobject_get(&device->dev.kobj); + struct kobject *kobj_dev = kobject_get(&device->dev.kobj); + int i; - list_for_each_entry_safe(p, t, &device->port_list, entry) { - list_del(&p->entry); - port = container_of(p, struct ib_port, kobj); - sysfs_remove_group(p, &pma_group); - sysfs_remove_group(p, &port->pkey_group); - sysfs_remove_group(p, &port->gid_group); - kobject_put(p); - } + if (device->node_type == RDMA_NODE_RNIC && device->get_protocol_stats) + sysfs_remove_group(kobj_dev, &iw_stats_group); + + free_port_list_attributes(device); + + for (i = 0; i < ARRAY_SIZE(ib_class_attributes); ++i) + device_remove_file(&device->dev, ib_class_attributes[i]); - kobject_put(device->ports_parent); device_unregister(&device->dev); } diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index 06f08713f48..f2f63933e8a 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -176,7 +176,6 @@ static void ib_ucm_cleanup_events(struct ib_ucm_context *ctx) static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) { struct ib_ucm_context *ctx; - int result; ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) @@ -187,17 +186,10 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file) ctx->file = file; INIT_LIST_HEAD(&ctx->events); - do { - result = idr_pre_get(&ctx_id_table, GFP_KERNEL); - if (!result) - goto error; - - mutex_lock(&ctx_id_mutex); - result = idr_get_new(&ctx_id_table, ctx, &ctx->id); - mutex_unlock(&ctx_id_mutex); - } while (result == -EAGAIN); - - if (result) + mutex_lock(&ctx_id_mutex); + ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&ctx_id_mutex); + if (ctx->id < 0) goto error; list_add_tail(&ctx->file_list, &file->ctxs); @@ -397,7 +389,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file, struct ib_ucm_event_get cmd; struct ib_ucm_event *uevent; int result = 0; - DEFINE_WAIT(wait); if (out_len < sizeof(struct ib_ucm_event_resp)) return -ENOSPC; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 5034a87cc72..56a4b7ca7ee 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -47,6 +47,8 @@ #include <rdma/ib_marshall.h> #include <rdma/rdma_cm.h> #include <rdma/rdma_cm_ib.h> +#include <rdma/ib_addr.h> +#include <rdma/ib.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); @@ -55,7 +57,7 @@ MODULE_LICENSE("Dual BSD/GPL"); static unsigned int max_backlog = 1024; static struct ctl_table_header *ucma_ctl_table_hdr; -static ctl_table ucma_ctl_table[] = { +static struct ctl_table ucma_ctl_table[] = { { .procname = "max_backlog", .data = &max_backlog, @@ -66,12 +68,6 @@ static ctl_table ucma_ctl_table[] = { { } }; -static struct ctl_path ucma_ctl_path[] = { - { .procname = "net" }, - { .procname = "rdma_ucm" }, - { } -}; - struct ucma_file { struct mutex mut; struct file *filp; @@ -151,7 +147,6 @@ static void ucma_put_ctx(struct ucma_context *ctx) static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) { struct ucma_context *ctx; - int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -162,17 +157,10 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_LIST_HEAD(&ctx->mc_list); ctx->file = file; - do { - ret = idr_pre_get(&ctx_idr, GFP_KERNEL); - if (!ret) - goto error; - - mutex_lock(&mut); - ret = idr_get_new(&ctx_idr, ctx, &ctx->id); - mutex_unlock(&mut); - } while (ret == -EAGAIN); - - if (ret) + mutex_lock(&mut); + ctx->id = idr_alloc(&ctx_idr, ctx, 0, 0, GFP_KERNEL); + mutex_unlock(&mut); + if (ctx->id < 0) goto error; list_add_tail(&ctx->list, &file->ctx_list); @@ -186,23 +174,15 @@ error: static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) { struct ucma_multicast *mc; - int ret; mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return NULL; - do { - ret = idr_pre_get(&multicast_idr, GFP_KERNEL); - if (!ret) - goto error; - - mutex_lock(&mut); - ret = idr_get_new(&multicast_idr, mc, &mc->id); - mutex_unlock(&mut); - } while (ret == -EAGAIN); - - if (ret) + mutex_lock(&mut); + mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL); + mutex_unlock(&mut); + if (mc->id < 0) goto error; mc->ctx = ctx; @@ -273,6 +253,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, if (!uevent) return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; + mutex_lock(&ctx->file->mut); uevent->cm_id = cm_id; ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; @@ -283,7 +264,6 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, ucma_copy_conn_event(&uevent->resp.param.conn, &event->param.conn); - mutex_lock(&ctx->file->mut); if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { if (!ctx->backlog) { ret = -ENOMEM; @@ -291,7 +271,7 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, goto out; } ctx->backlog--; - } else if (!ctx->uid) { + } else if (!ctx->uid || ctx->cm_id != cm_id) { /* * We ignore events for new connections until userspace has set * their context. This can only happen if an error occurs on a @@ -316,7 +296,6 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, struct rdma_ucm_get_event cmd; struct ucma_event *uevent; int ret = 0; - DEFINE_WAIT(wait); if (out_len < sizeof uevent->resp) return -ENOSPC; @@ -449,24 +428,6 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx) mutex_unlock(&mut); } -static void ucma_cleanup_events(struct ucma_context *ctx) -{ - struct ucma_event *uevent, *tmp; - - list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx != ctx) - continue; - - list_del(&uevent->list); - - /* clear incoming connections. */ - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) - rdma_destroy_id(uevent->cm_id); - - kfree(uevent); - } -} - static void ucma_cleanup_mc_events(struct ucma_multicast *mc) { struct ucma_event *uevent, *tmp; @@ -480,9 +441,16 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) } } +/* + * We cannot hold file->mut when calling rdma_destroy_id() or we can + * deadlock. We also acquire file->mut in ucma_event_handler(), and + * rdma_destroy_id() will wait until all callbacks have completed. + */ static int ucma_free_ctx(struct ucma_context *ctx) { int events_reported; + struct ucma_event *uevent, *tmp; + LIST_HEAD(list); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); @@ -491,10 +459,20 @@ static int ucma_free_ctx(struct ucma_context *ctx) /* Cleanup events not yet reported to the user. */ mutex_lock(&ctx->file->mut); - ucma_cleanup_events(ctx); + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &list); + } list_del(&ctx->list); mutex_unlock(&ctx->file->mut); + list_for_each_entry_safe(uevent, tmp, &list, list) { + list_del(&uevent->list); + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) + rdma_destroy_id(uevent->cm_id); + kfree(uevent); + } + events_reported = ctx->events_reported; kfree(ctx); return events_reported; @@ -534,10 +512,10 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, return ret; } -static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, +static ssize_t ucma_bind_ip(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { - struct rdma_ucm_bind_addr cmd; + struct rdma_ucm_bind_ip cmd; struct ucma_context *ctx; int ret; @@ -553,24 +531,75 @@ static ssize_t ucma_bind_addr(struct ucma_file *file, const char __user *inbuf, return ret; } +static ssize_t ucma_bind(struct ucma_file *file, const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_bind cmd; + struct sockaddr *addr; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + addr = (struct sockaddr *) &cmd.addr; + if (cmd.reserved || !cmd.addr_size || (cmd.addr_size != rdma_addr_size(addr))) + return -EINVAL; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_bind_addr(ctx->cm_id, addr); + ucma_put_ctx(ctx); + return ret; +} + +static ssize_t ucma_resolve_ip(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_resolve_ip cmd; + struct ucma_context *ctx; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, + (struct sockaddr *) &cmd.dst_addr, + cmd.timeout_ms); + ucma_put_ctx(ctx); + return ret; +} + static ssize_t ucma_resolve_addr(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_resolve_addr cmd; + struct sockaddr *src, *dst; struct ucma_context *ctx; int ret; if (copy_from_user(&cmd, inbuf, sizeof(cmd))) return -EFAULT; + src = (struct sockaddr *) &cmd.src_addr; + dst = (struct sockaddr *) &cmd.dst_addr; + if (cmd.reserved || (cmd.src_size && (cmd.src_size != rdma_addr_size(src))) || + !cmd.dst_size || (cmd.dst_size != rdma_addr_size(dst))) + return -EINVAL; + ctx = ucma_get_ctx(file, cmd.id); if (IS_ERR(ctx)) return PTR_ERR(ctx); - ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, - (struct sockaddr *) &cmd.dst_addr, - cmd.timeout_ms); + ret = rdma_resolve_addr(ctx->cm_id, src, dst, cmd.timeout_ms); ucma_put_ctx(ctx); return ret; } @@ -626,24 +655,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { - struct rdma_dev_addr *dev_addr; - struct net_device *dev; - u16 vid = 0; resp->num_paths = route->num_paths; switch (route->num_paths) { case 0: - dev_addr = &route->addr.dev_addr; - dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); - if (dev) { - vid = rdma_vlan_dev_vlan_id(dev); - dev_put(dev); - } - - iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid, - dev_addr->dst_dev_addr, vid); - iboe_addr_get_sgid(dev_addr, - (union ib_gid *) &resp->ib_route[0].sgid); + rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, + (union ib_gid *)&resp->ib_route[0].dgid); + rdma_ip2gid((struct sockaddr *)&route->addr.src_addr, + (union ib_gid *)&resp->ib_route[0].sgid); resp->ib_route[0].pkey = cpu_to_be16(0xffff); break; case 2: @@ -673,7 +692,7 @@ static ssize_t ucma_query_route(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { - struct rdma_ucm_query_route cmd; + struct rdma_ucm_query cmd; struct rdma_ucm_query_route_resp resp; struct ucma_context *ctx; struct sockaddr *addr; @@ -733,7 +752,162 @@ out: return ret; } -static void ucma_copy_conn_param(struct rdma_conn_param *dst, +static void ucma_query_device_addr(struct rdma_cm_id *cm_id, + struct rdma_ucm_query_addr_resp *resp) +{ + if (!cm_id->device) + return; + + resp->node_guid = (__force __u64) cm_id->device->node_guid; + resp->port_num = cm_id->port_num; + resp->pkey = (__force __u16) cpu_to_be16( + ib_addr_get_pkey(&cm_id->route.addr.dev_addr)); +} + +static ssize_t ucma_query_addr(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_addr_resp resp; + struct sockaddr *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + memset(&resp, 0, sizeof resp); + + addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; + resp.src_size = rdma_addr_size(addr); + memcpy(&resp.src_addr, addr, resp.src_size); + + addr = (struct sockaddr *) &ctx->cm_id->route.addr.dst_addr; + resp.dst_size = rdma_addr_size(addr); + memcpy(&resp.dst_addr, addr, resp.dst_size); + + ucma_query_device_addr(ctx->cm_id, &resp); + + if (copy_to_user(response, &resp, sizeof(resp))) + ret = -EFAULT; + + return ret; +} + +static ssize_t ucma_query_path(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_path_resp *resp; + int i, ret = 0; + + if (out_len < sizeof(*resp)) + return -ENOSPC; + + resp = kzalloc(out_len, GFP_KERNEL); + if (!resp) + return -ENOMEM; + + resp->num_paths = ctx->cm_id->route.num_paths; + for (i = 0, out_len -= sizeof(*resp); + i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); + i++, out_len -= sizeof(struct ib_path_rec_data)) { + + resp->path_data[i].flags = IB_PATH_GMP | IB_PATH_PRIMARY | + IB_PATH_BIDIRECTIONAL; + ib_sa_pack_path(&ctx->cm_id->route.path_rec[i], + &resp->path_data[i].path_rec); + } + + if (copy_to_user(response, resp, + sizeof(*resp) + (i * sizeof(struct ib_path_rec_data)))) + ret = -EFAULT; + + kfree(resp); + return ret; +} + +static ssize_t ucma_query_gid(struct ucma_context *ctx, + void __user *response, int out_len) +{ + struct rdma_ucm_query_addr_resp resp; + struct sockaddr_ib *addr; + int ret = 0; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + memset(&resp, 0, sizeof resp); + + ucma_query_device_addr(ctx->cm_id, &resp); + + addr = (struct sockaddr_ib *) &resp.src_addr; + resp.src_size = sizeof(*addr); + if (ctx->cm_id->route.addr.src_addr.ss_family == AF_IB) { + memcpy(addr, &ctx->cm_id->route.addr.src_addr, resp.src_size); + } else { + addr->sib_family = AF_IB; + addr->sib_pkey = (__force __be16) resp.pkey; + rdma_addr_get_sgid(&ctx->cm_id->route.addr.dev_addr, + (union ib_gid *) &addr->sib_addr); + addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) + &ctx->cm_id->route.addr.src_addr); + } + + addr = (struct sockaddr_ib *) &resp.dst_addr; + resp.dst_size = sizeof(*addr); + if (ctx->cm_id->route.addr.dst_addr.ss_family == AF_IB) { + memcpy(addr, &ctx->cm_id->route.addr.dst_addr, resp.dst_size); + } else { + addr->sib_family = AF_IB; + addr->sib_pkey = (__force __be16) resp.pkey; + rdma_addr_get_dgid(&ctx->cm_id->route.addr.dev_addr, + (union ib_gid *) &addr->sib_addr); + addr->sib_sid = rdma_get_service_id(ctx->cm_id, (struct sockaddr *) + &ctx->cm_id->route.addr.dst_addr); + } + + if (copy_to_user(response, &resp, sizeof(resp))) + ret = -EFAULT; + + return ret; +} + +static ssize_t ucma_query(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_query cmd; + struct ucma_context *ctx; + void __user *response; + int ret; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + response = (void __user *)(unsigned long) cmd.response; + ctx = ucma_get_ctx(file, cmd.id); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + switch (cmd.option) { + case RDMA_USER_CM_QUERY_ADDR: + ret = ucma_query_addr(ctx, response, out_len); + break; + case RDMA_USER_CM_QUERY_PATH: + ret = ucma_query_path(ctx, response, out_len); + break; + case RDMA_USER_CM_QUERY_GID: + ret = ucma_query_gid(ctx, response, out_len); + break; + default: + ret = -ENOSYS; + break; + } + + ucma_put_ctx(ctx); + return ret; +} + +static void ucma_copy_conn_param(struct rdma_cm_id *id, + struct rdma_conn_param *dst, struct rdma_ucm_conn_param *src) { dst->private_data = src->private_data; @@ -745,6 +919,7 @@ static void ucma_copy_conn_param(struct rdma_conn_param *dst, dst->rnr_retry_count = src->rnr_retry_count; dst->srq = src->srq; dst->qp_num = src->qp_num; + dst->qkey = (id->route.addr.src_addr.ss_family == AF_IB) ? src->qkey : 0; } static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, @@ -765,7 +940,7 @@ static ssize_t ucma_connect(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ucma_copy_conn_param(&conn_param, &cmd.conn_param); + ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); ret = rdma_connect(ctx->cm_id, &conn_param); ucma_put_ctx(ctx); return ret; @@ -808,7 +983,7 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); if (cmd.conn_param.valid) { - ucma_copy_conn_param(&conn_param, &cmd.conn_param); + ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); mutex_lock(&file->mut); ret = rdma_accept(ctx->cm_id, &conn_param); if (!ret) @@ -916,6 +1091,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, } ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); break; + case RDMA_OPTION_ID_AFONLY: + if (optlen != sizeof(int)) { + ret = -EINVAL; + break; + } + ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0); + break; default: ret = -ENOSYS; } @@ -1002,23 +1184,18 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - optval = kmalloc(cmd.optlen, GFP_KERNEL); - if (!optval) { - ret = -ENOMEM; - goto out1; - } - - if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval, - cmd.optlen)) { - ret = -EFAULT; - goto out2; + optval = memdup_user((void __user *) (unsigned long) cmd.optval, + cmd.optlen); + if (IS_ERR(optval)) { + ret = PTR_ERR(optval); + goto out; } ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, cmd.optlen); -out2: kfree(optval); -out1: + +out: ucma_put_ctx(ctx); return ret; } @@ -1042,23 +1219,23 @@ static ssize_t ucma_notify(struct ucma_file *file, const char __user *inbuf, return ret; } -static ssize_t ucma_join_multicast(struct ucma_file *file, - const char __user *inbuf, - int in_len, int out_len) +static ssize_t ucma_process_join(struct ucma_file *file, + struct rdma_ucm_join_mcast *cmd, int out_len) { - struct rdma_ucm_join_mcast cmd; struct rdma_ucm_create_id_resp resp; struct ucma_context *ctx; struct ucma_multicast *mc; + struct sockaddr *addr; int ret; if (out_len < sizeof(resp)) return -ENOSPC; - if (copy_from_user(&cmd, inbuf, sizeof(cmd))) - return -EFAULT; + addr = (struct sockaddr *) &cmd->addr; + if (cmd->reserved || !cmd->addr_size || (cmd->addr_size != rdma_addr_size(addr))) + return -EINVAL; - ctx = ucma_get_ctx(file, cmd.id); + ctx = ucma_get_ctx(file, cmd->id); if (IS_ERR(ctx)) return PTR_ERR(ctx); @@ -1069,14 +1246,14 @@ static ssize_t ucma_join_multicast(struct ucma_file *file, goto err1; } - mc->uid = cmd.uid; - memcpy(&mc->addr, &cmd.addr, sizeof cmd.addr); + mc->uid = cmd->uid; + memcpy(&mc->addr, addr, cmd->addr_size); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr, mc); if (ret) goto err2; resp.id = mc->id; - if (copy_to_user((void __user *)(unsigned long)cmd.response, + if (copy_to_user((void __user *)(unsigned long) cmd->response, &resp, sizeof(resp))) { ret = -EFAULT; goto err3; @@ -1101,6 +1278,38 @@ err1: return ret; } +static ssize_t ucma_join_ip_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_join_ip_mcast cmd; + struct rdma_ucm_join_mcast join_cmd; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + join_cmd.response = cmd.response; + join_cmd.uid = cmd.uid; + join_cmd.id = cmd.id; + join_cmd.addr_size = rdma_addr_size((struct sockaddr *) &cmd.addr); + join_cmd.reserved = 0; + memcpy(&join_cmd.addr, &cmd.addr, join_cmd.addr_size); + + return ucma_process_join(file, &join_cmd, out_len); +} + +static ssize_t ucma_join_multicast(struct ucma_file *file, + const char __user *inbuf, + int in_len, int out_len) +{ + struct rdma_ucm_join_mcast cmd; + + if (copy_from_user(&cmd, inbuf, sizeof(cmd))) + return -EFAULT; + + return ucma_process_join(file, &cmd, out_len); +} + static ssize_t ucma_leave_multicast(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) @@ -1189,7 +1398,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, struct rdma_ucm_migrate_id cmd; struct rdma_ucm_migrate_resp resp; struct ucma_context *ctx; - struct file *filp; + struct fd f; struct ucma_file *cur_file; int ret = 0; @@ -1197,12 +1406,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, return -EFAULT; /* Get current fd to protect against it being closed */ - filp = fget(cmd.fd); - if (!filp) + f = fdget(cmd.fd); + if (!f.file) return -ENOENT; /* Validate current fd and prevent destruction of id. */ - ctx = ucma_get_ctx(filp->private_data, cmd.id); + ctx = ucma_get_ctx(f.file->private_data, cmd.id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto file_put; @@ -1236,32 +1445,36 @@ response: ucma_put_ctx(ctx); file_put: - fput(filp); + fdput(f); return ret; } static ssize_t (*ucma_cmd_table[])(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) = { - [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, - [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, - [RDMA_USER_CM_CMD_BIND_ADDR] = ucma_bind_addr, - [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, - [RDMA_USER_CM_CMD_RESOLVE_ROUTE]= ucma_resolve_route, - [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, - [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, - [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, - [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, - [RDMA_USER_CM_CMD_REJECT] = ucma_reject, - [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, - [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, - [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, - [RDMA_USER_CM_CMD_GET_OPTION] = NULL, - [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, - [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, - [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast, - [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, - [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id + [RDMA_USER_CM_CMD_CREATE_ID] = ucma_create_id, + [RDMA_USER_CM_CMD_DESTROY_ID] = ucma_destroy_id, + [RDMA_USER_CM_CMD_BIND_IP] = ucma_bind_ip, + [RDMA_USER_CM_CMD_RESOLVE_IP] = ucma_resolve_ip, + [RDMA_USER_CM_CMD_RESOLVE_ROUTE] = ucma_resolve_route, + [RDMA_USER_CM_CMD_QUERY_ROUTE] = ucma_query_route, + [RDMA_USER_CM_CMD_CONNECT] = ucma_connect, + [RDMA_USER_CM_CMD_LISTEN] = ucma_listen, + [RDMA_USER_CM_CMD_ACCEPT] = ucma_accept, + [RDMA_USER_CM_CMD_REJECT] = ucma_reject, + [RDMA_USER_CM_CMD_DISCONNECT] = ucma_disconnect, + [RDMA_USER_CM_CMD_INIT_QP_ATTR] = ucma_init_qp_attr, + [RDMA_USER_CM_CMD_GET_EVENT] = ucma_get_event, + [RDMA_USER_CM_CMD_GET_OPTION] = NULL, + [RDMA_USER_CM_CMD_SET_OPTION] = ucma_set_option, + [RDMA_USER_CM_CMD_NOTIFY] = ucma_notify, + [RDMA_USER_CM_CMD_JOIN_IP_MCAST] = ucma_join_ip_multicast, + [RDMA_USER_CM_CMD_LEAVE_MCAST] = ucma_leave_multicast, + [RDMA_USER_CM_CMD_MIGRATE_ID] = ucma_migrate_id, + [RDMA_USER_CM_CMD_QUERY] = ucma_query, + [RDMA_USER_CM_CMD_BIND] = ucma_bind, + [RDMA_USER_CM_CMD_RESOLVE_ADDR] = ucma_resolve_addr, + [RDMA_USER_CM_CMD_JOIN_MCAST] = ucma_join_multicast }; static ssize_t ucma_write(struct file *filp, const char __user *buf, @@ -1393,7 +1606,7 @@ static int __init ucma_init(void) goto err1; } - ucma_ctl_table_hdr = register_sysctl_paths(ucma_ctl_path, ucma_ctl_table); + ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); if (!ucma_ctl_table_hdr) { printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n"); ret = -ENOMEM; @@ -1409,7 +1622,7 @@ err1: static void __exit ucma_cleanup(void) { - unregister_sysctl_table(ucma_ctl_table_hdr); + unregister_net_sysctl_table(ucma_ctl_table_hdr); device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); misc_deregister(&ucma_misc); idr_destroy(&ctx_idr); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 71f0c0f7df9..a3a2e9c1639 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -42,29 +42,29 @@ #include "uverbs.h" -#define IB_UMEM_MAX_PAGE_CHUNK \ - ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ - ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ - (void *) &((struct ib_umem_chunk *) 0)->page_list[0])) static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { - struct ib_umem_chunk *chunk, *tmp; + struct scatterlist *sg; + struct page *page; int i; - list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { - ib_dma_unmap_sg(dev, chunk->page_list, - chunk->nents, DMA_BIDIRECTIONAL); - for (i = 0; i < chunk->nents; ++i) { - struct page *page = sg_page(&chunk->page_list[i]); + if (umem->nmap > 0) + ib_dma_unmap_sg(dev, umem->sg_head.sgl, + umem->nmap, + DMA_BIDIRECTIONAL); - if (umem->writable && dirty) - set_page_dirty_lock(page); - put_page(page); - } + for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { - kfree(chunk); + page = sg_page(sg); + if (umem->writable && dirty) + set_page_dirty_lock(page); + put_page(page); } + + sg_free_table(&umem->sg_head); + return; + } /** @@ -81,15 +81,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, struct ib_umem *umem; struct page **page_list; struct vm_area_struct **vma_list; - struct ib_umem_chunk *chunk; unsigned long locked; unsigned long lock_limit; unsigned long cur_base; unsigned long npages; int ret; - int off; int i; DEFINE_DMA_ATTRS(attrs); + struct scatterlist *sg, *sg_list_start; + int need_release = 0; if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); @@ -97,7 +97,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (!can_do_mlock()) return ERR_PTR(-EPERM); - umem = kmalloc(sizeof *umem, GFP_KERNEL); + umem = kzalloc(sizeof *umem, GFP_KERNEL); if (!umem) return ERR_PTR(-ENOMEM); @@ -117,8 +117,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, /* We assume the memory is from hugetlb until proved otherwise */ umem->hugetlb = 1; - INIT_LIST_HEAD(&umem->chunk_list); - page_list = (struct page **) __get_free_page(GFP_KERNEL); if (!page_list) { kfree(umem); @@ -147,7 +145,18 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, cur_base = addr & PAGE_MASK; - ret = 0; + if (npages == 0) { + ret = -EINVAL; + goto out; + } + + ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); + if (ret) + goto out; + + need_release = 1; + sg_list_start = umem->sg_head.sgl; + while (npages) { ret = get_user_pages(current, current->mm, cur_base, min_t(unsigned long, npages, @@ -157,54 +166,38 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (ret < 0) goto out; + umem->npages += ret; cur_base += ret * PAGE_SIZE; npages -= ret; - off = 0; - - while (ret) { - chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * - min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), - GFP_KERNEL); - if (!chunk) { - ret = -ENOMEM; - goto out; - } - - chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); - sg_init_table(chunk->page_list, chunk->nents); - for (i = 0; i < chunk->nents; ++i) { - if (vma_list && - !is_vm_hugetlb_page(vma_list[i + off])) - umem->hugetlb = 0; - sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0); - } - - chunk->nmap = ib_dma_map_sg_attrs(context->device, - &chunk->page_list[0], - chunk->nents, - DMA_BIDIRECTIONAL, - &attrs); - if (chunk->nmap <= 0) { - for (i = 0; i < chunk->nents; ++i) - put_page(sg_page(&chunk->page_list[i])); - kfree(chunk); - - ret = -ENOMEM; - goto out; - } - - ret -= chunk->nents; - off += chunk->nents; - list_add_tail(&chunk->list, &umem->chunk_list); + for_each_sg(sg_list_start, sg, ret, i) { + if (vma_list && !is_vm_hugetlb_page(vma_list[i])) + umem->hugetlb = 0; + + sg_set_page(sg, page_list[i], PAGE_SIZE, 0); } - ret = 0; + /* preparing for next loop */ + sg_list_start = sg; } + umem->nmap = ib_dma_map_sg_attrs(context->device, + umem->sg_head.sgl, + umem->npages, + DMA_BIDIRECTIONAL, + &attrs); + + if (umem->nmap <= 0) { + ret = -ENOMEM; + goto out; + } + + ret = 0; + out: if (ret < 0) { - __ib_umem_release(context->device, umem, 0); + if (need_release) + __ib_umem_release(context->device, umem, 0); kfree(umem); } else current->mm->pinned_vm = locked; @@ -269,7 +262,7 @@ void ib_umem_release(struct ib_umem *umem) } else down_write(&mm->mmap_sem); - current->mm->locked_vm -= diff; + current->mm->pinned_vm -= diff; up_write(&mm->mmap_sem); mmput(mm); kfree(umem); @@ -278,17 +271,16 @@ EXPORT_SYMBOL(ib_umem_release); int ib_umem_page_count(struct ib_umem *umem) { - struct ib_umem_chunk *chunk; int shift; int i; int n; + struct scatterlist *sg; shift = ilog2(umem->page_size); n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (i = 0; i < chunk->nmap; ++i) - n += sg_dma_len(&chunk->page_list[i]) >> shift; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) + n += sg_dma_len(sg) >> shift; return n; } diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index f0d588f8859..1acb9910055 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -98,7 +98,7 @@ struct ib_umad_port { struct ib_umad_device { int start_port, end_port; - struct kref ref; + struct kobject kobj; struct ib_umad_port port[0]; }; @@ -134,14 +134,18 @@ static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS); static void ib_umad_add_one(struct ib_device *device); static void ib_umad_remove_one(struct ib_device *device); -static void ib_umad_release_dev(struct kref *ref) +static void ib_umad_release_dev(struct kobject *kobj) { struct ib_umad_device *dev = - container_of(ref, struct ib_umad_device, ref); + container_of(kobj, struct ib_umad_device, kobj); kfree(dev); } +static struct kobj_type ib_umad_dev_ktype = { + .release = ib_umad_release_dev, +}; + static int hdr_size(struct ib_umad_file *file) { return file->use_pkey_index ? sizeof (struct ib_user_mad_hdr) : @@ -780,27 +784,19 @@ static int ib_umad_open(struct inode *inode, struct file *filp) { struct ib_umad_port *port; struct ib_umad_file *file; - int ret; + int ret = -ENXIO; port = container_of(inode->i_cdev, struct ib_umad_port, cdev); - if (port) - kref_get(&port->umad_dev->ref); - else - return -ENXIO; mutex_lock(&port->file_mutex); - if (!port->ib_dev) { - ret = -ENXIO; + if (!port->ib_dev) goto out; - } + ret = -ENOMEM; file = kzalloc(sizeof *file, GFP_KERNEL); - if (!file) { - kref_put(&port->umad_dev->ref, ib_umad_release_dev); - ret = -ENOMEM; + if (!file) goto out; - } mutex_init(&file->mutex); spin_lock_init(&file->send_lock); @@ -814,6 +810,13 @@ static int ib_umad_open(struct inode *inode, struct file *filp) list_add_tail(&file->port_list, &port->file_list); ret = nonseekable_open(inode, filp); + if (ret) { + list_del(&file->port_list); + kfree(file); + goto out; + } + + kobject_get(&port->umad_dev->kobj); out: mutex_unlock(&port->file_mutex); @@ -852,7 +855,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp) mutex_unlock(&file->port->file_mutex); kfree(file); - kref_put(&dev->ref, ib_umad_release_dev); + kobject_put(&dev->kobj); return 0; } @@ -880,10 +883,6 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) int ret; port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev); - if (port) - kref_get(&port->umad_dev->ref); - else - return -ENXIO; if (filp->f_flags & O_NONBLOCK) { if (down_trylock(&port->sm_sem)) { @@ -898,17 +897,27 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp) } ret = ib_modify_port(port->ib_dev, port->port_num, 0, &props); - if (ret) { - up(&port->sm_sem); - goto fail; - } + if (ret) + goto err_up_sem; filp->private_data = port; - return nonseekable_open(inode, filp); + ret = nonseekable_open(inode, filp); + if (ret) + goto err_clr_sm_cap; + + kobject_get(&port->umad_dev->kobj); + + return 0; + +err_clr_sm_cap: + swap(props.set_port_cap_mask, props.clr_port_cap_mask); + ib_modify_port(port->ib_dev, port->port_num, 0, &props); + +err_up_sem: + up(&port->sm_sem); fail: - kref_put(&port->umad_dev->ref, ib_umad_release_dev); return ret; } @@ -927,7 +936,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp) up(&port->sm_sem); - kref_put(&port->umad_dev->ref, ib_umad_release_dev); + kobject_put(&port->umad_dev->kobj); return ret; } @@ -995,6 +1004,7 @@ static int find_overflow_devnum(void) } static int ib_umad_init_port(struct ib_device *device, int port_num, + struct ib_umad_device *umad_dev, struct ib_umad_port *port) { int devnum; @@ -1027,6 +1037,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, cdev_init(&port->cdev, &umad_fops); port->cdev.owner = THIS_MODULE; + port->cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num); if (cdev_add(&port->cdev, base, 1)) goto err_cdev; @@ -1045,6 +1056,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num, base += IB_UMAD_MAX_PORTS; cdev_init(&port->sm_cdev, &umad_sm_fops); port->sm_cdev.owner = THIS_MODULE; + port->sm_cdev.kobj.parent = &umad_dev->kobj; kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num); if (cdev_add(&port->sm_cdev, base, 1)) goto err_sm_cdev; @@ -1138,7 +1150,7 @@ static void ib_umad_add_one(struct ib_device *device) if (!umad_dev) return; - kref_init(&umad_dev->ref); + kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype); umad_dev->start_port = s; umad_dev->end_port = e; @@ -1146,7 +1158,8 @@ static void ib_umad_add_one(struct ib_device *device) for (i = s; i <= e; ++i) { umad_dev->port[i - s].umad_dev = umad_dev; - if (ib_umad_init_port(device, i, &umad_dev->port[i - s])) + if (ib_umad_init_port(device, i, umad_dev, + &umad_dev->port[i - s])) goto err; } @@ -1158,7 +1171,7 @@ err: while (--i >= s) ib_umad_kill_port(&umad_dev->port[i - s]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + kobject_put(&umad_dev->kobj); } static void ib_umad_remove_one(struct ib_device *device) @@ -1172,7 +1185,7 @@ static void ib_umad_remove_one(struct ib_device *device) for (i = 0; i <= umad_dev->end_port - umad_dev->start_port; ++i) ib_umad_kill_port(&umad_dev->port[i]); - kref_put(&umad_dev->ref, ib_umad_release_dev); + kobject_put(&umad_dev->kobj); } static char *umad_devnode(struct device *dev, umode_t *mode) diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 5bcb2afd3dc..a283274a5a0 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -47,6 +47,22 @@ #include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> +#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ + do { \ + (udata)->inbuf = (const void __user *) (ibuf); \ + (udata)->outbuf = (void __user *) (obuf); \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } while (0) + +#define INIT_UDATA_BUF_OR_NULL(udata, ibuf, obuf, ilen, olen) \ + do { \ + (udata)->inbuf = (ilen) ? (const void __user *) (ibuf) : NULL; \ + (udata)->outbuf = (olen) ? (void __user *) (obuf) : NULL; \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } while (0) + /* * Our lifetime rules for these structs are the following: * @@ -135,6 +151,7 @@ struct ib_usrq_object { struct ib_uqp_object { struct ib_uevent_object uevent; struct list_head mcast_list; + struct ib_uxrcd_object *uxrcd; }; struct ib_ucq_object { @@ -155,6 +172,7 @@ extern struct idr ib_uverbs_cq_idr; extern struct idr ib_uverbs_qp_idr; extern struct idr ib_uverbs_srq_idr; extern struct idr ib_uverbs_xrcd_idr; +extern struct idr ib_uverbs_rule_idr; void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); @@ -176,6 +194,22 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler, struct ib_event *event); void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd); +struct ib_uverbs_flow_spec { + union { + union { + struct ib_uverbs_flow_spec_hdr hdr; + struct { + __u32 type; + __u16 size; + __u16 reserved; + }; + }; + struct ib_uverbs_flow_spec_eth eth; + struct ib_uverbs_flow_spec_ipv4 ipv4; + struct ib_uverbs_flow_spec_tcp_udp tcp_udp; + }; +}; + #define IB_UVERBS_DECLARE_CMD(name) \ ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ const char __user *buf, int in_len, \ @@ -188,6 +222,8 @@ IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(alloc_mw); +IB_UVERBS_DECLARE_CMD(dealloc_mw); IB_UVERBS_DECLARE_CMD(create_comp_channel); IB_UVERBS_DECLARE_CMD(create_cq); IB_UVERBS_DECLARE_CMD(resize_cq); @@ -214,4 +250,12 @@ IB_UVERBS_DECLARE_CMD(create_xsrq); IB_UVERBS_DECLARE_CMD(open_xrcd); IB_UVERBS_DECLARE_CMD(close_xrcd); +#define IB_UVERBS_DECLARE_EX_CMD(name) \ + int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \ + struct ib_udata *ucore, \ + struct ib_udata *uhw) + +IB_UVERBS_DECLARE_EX_CMD(create_flow); +IB_UVERBS_DECLARE_EX_CMD(destroy_flow); + #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 4d27e4c3fe3..ea6203ee7bc 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -40,22 +40,22 @@ #include <asm/uaccess.h> #include "uverbs.h" +#include "core_priv.h" -static struct lock_class_key pd_lock_key; -static struct lock_class_key mr_lock_key; -static struct lock_class_key cq_lock_key; -static struct lock_class_key qp_lock_key; -static struct lock_class_key ah_lock_key; -static struct lock_class_key srq_lock_key; -static struct lock_class_key xrcd_lock_key; - -#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ - do { \ - (udata)->inbuf = (void __user *) (ibuf); \ - (udata)->outbuf = (void __user *) (obuf); \ - (udata)->inlen = (ilen); \ - (udata)->outlen = (olen); \ - } while (0) +struct uverbs_lock_class { + struct lock_class_key key; + char name[16]; +}; + +static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; +static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" }; +static struct uverbs_lock_class mw_lock_class = { .name = "MW-uobj" }; +static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" }; +static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" }; +static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" }; +static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" }; +static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" }; +static struct uverbs_lock_class rule_lock_class = { .name = "RULE-uobj" }; /* * The ib_uobject locking scheme is as follows: @@ -83,13 +83,13 @@ static struct lock_class_key xrcd_lock_key; */ static void init_uobj(struct ib_uobject *uobj, u64 user_handle, - struct ib_ucontext *context, struct lock_class_key *key) + struct ib_ucontext *context, struct uverbs_lock_class *c) { uobj->user_handle = user_handle; uobj->context = context; kref_init(&uobj->ref); init_rwsem(&uobj->mutex); - lockdep_set_class(&uobj->mutex, key); + lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); uobj->live = 0; } @@ -119,18 +119,17 @@ static int idr_add_uobj(struct idr *idr, struct ib_uobject *uobj) { int ret; -retry: - if (!idr_pre_get(idr, GFP_KERNEL)) - return -ENOMEM; - + idr_preload(GFP_KERNEL); spin_lock(&ib_uverbs_idr_lock); - ret = idr_get_new(idr, uobj, &uobj->id); - spin_unlock(&ib_uverbs_idr_lock); - if (ret == -EAGAIN) - goto retry; + ret = idr_alloc(idr, uobj, 0, 0, GFP_NOWAIT); + if (ret >= 0) + uobj->id = ret; - return ret; + spin_unlock(&ib_uverbs_idr_lock); + idr_preload_end(); + + return ret < 0 ? ret : 0; } void idr_remove_uobj(struct idr *idr, struct ib_uobject *uobj) @@ -325,11 +324,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, INIT_LIST_HEAD(&ucontext->srq_list); INIT_LIST_HEAD(&ucontext->ah_list); INIT_LIST_HEAD(&ucontext->xrcd_list); + INIT_LIST_HEAD(&ucontext->rule_list); ucontext->closing = 0; resp.num_comp_vectors = file->device->num_comp_vectors; - ret = get_unused_fd(); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) goto err_free; resp.async_fd = ret; @@ -522,7 +522,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &pd_lock_key); + init_uobj(uobj, 0, file->ucontext, &pd_lock_class); down_write(&uobj->mutex); pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, @@ -700,7 +700,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, struct ib_udata udata; struct ib_uxrcd_object *obj; struct ib_xrcd *xrcd = NULL; - struct file *f = NULL; + struct fd f = {NULL, 0}; struct inode *inode = NULL; int ret = 0; int new_xrcd = 0; @@ -719,18 +719,13 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, if (cmd.fd != -1) { /* search for file descriptor */ - f = fget(cmd.fd); - if (!f) { - ret = -EBADF; - goto err_tree_mutex_unlock; - } - - inode = f->f_dentry->d_inode; - if (!inode) { + f = fdget(cmd.fd); + if (!f.file) { ret = -EBADF; goto err_tree_mutex_unlock; } + inode = file_inode(f.file); xrcd = find_xrcd(file->device, inode); if (!xrcd && !(cmd.oflags & O_CREAT)) { /* no file descriptor. Need CREATE flag */ @@ -750,7 +745,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, goto err_tree_mutex_unlock; } - init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_key); + init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class); down_write(&obj->uobject.mutex); @@ -795,8 +790,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file, goto err_copy; } - if (f) - fput(f); + if (f.file) + fdput(f); mutex_lock(&file->mutex); list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list); @@ -825,8 +820,8 @@ err: put_uobj_write(&obj->uobject); err_tree_mutex_unlock: - if (f) - fput(f); + if (f.file) + fdput(f); mutex_unlock(&file->device->xrcd_tree_mutex); @@ -935,19 +930,15 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) return -EINVAL; - /* - * Local write permission is required if remote write or - * remote atomic permission is also requested. - */ - if (cmd.access_flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && - !(cmd.access_flags & IB_ACCESS_LOCAL_WRITE)) - return -EINVAL; + ret = ib_check_mr_access(cmd.access_flags); + if (ret) + return ret; uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; - init_uobj(uobj, 0, file->ucontext, &mr_lock_key); + init_uobj(uobj, 0, file->ucontext, &mr_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -1049,6 +1040,126 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, return in_len; } +ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_alloc_mw cmd; + struct ib_uverbs_alloc_mw_resp resp; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_mw *mw; + int ret; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + init_uobj(uobj, 0, file->ucontext, &mw_lock_class); + down_write(&uobj->mutex); + + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_free; + } + + mw = pd->device->alloc_mw(pd, cmd.mw_type); + if (IS_ERR(mw)) { + ret = PTR_ERR(mw); + goto err_put; + } + + mw->device = pd->device; + mw->pd = pd; + mw->uobject = uobj; + atomic_inc(&pd->usecnt); + + uobj->object = mw; + ret = idr_add_uobj(&ib_uverbs_mw_idr, uobj); + if (ret) + goto err_unalloc; + + memset(&resp, 0, sizeof(resp)); + resp.rkey = mw->rkey; + resp.mw_handle = uobj->id; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) { + ret = -EFAULT; + goto err_copy; + } + + put_pd_read(pd); + + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->mw_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + + return in_len; + +err_copy: + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + +err_unalloc: + ib_dealloc_mw(mw); + +err_put: + put_pd_read(pd); + +err_free: + put_uobj_write(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dealloc_mw cmd; + struct ib_mw *mw; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + uobj = idr_write_uobj(&ib_uverbs_mw_idr, cmd.mw_handle, file->ucontext); + if (!uobj) + return -EINVAL; + + mw = uobj->object; + + ret = ib_dealloc_mw(mw); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + if (ret) + return ret; + + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return in_len; +} + ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) @@ -1064,7 +1175,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - ret = get_unused_fd(); + ret = get_unused_fd_flags(O_CLOEXEC); if (ret < 0) return ret; resp.fd = ret; @@ -1115,7 +1226,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key); + init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class); down_write(&obj->uobject.mutex); if (cmd.comp_channel >= 0) { @@ -1399,15 +1510,18 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) + return -EPERM; + INIT_UDATA(&udata, buf + sizeof cmd, (unsigned long) cmd.response + sizeof resp, in_len - sizeof cmd, out_len - sizeof resp); - obj = kmalloc(sizeof *obj, GFP_KERNEL); + obj = kzalloc(sizeof *obj, GFP_KERNEL); if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); if (cmd.qp_type == IB_QPT_XRC_TGT) { @@ -1418,13 +1532,6 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, } device = xrcd->device; } else { - pd = idr_read_pd(cmd.pd_handle, file->ucontext); - scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0); - if (!pd || !scq) { - ret = -EINVAL; - goto err_put; - } - if (cmd.qp_type == IB_QPT_XRC_INI) { cmd.max_recv_wr = cmd.max_recv_sge = 0; } else { @@ -1435,13 +1542,24 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_put; } } - rcq = (cmd.recv_cq_handle == cmd.send_cq_handle) ? - scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1); - if (!rcq) { - ret = -EINVAL; - goto err_put; + + if (cmd.recv_cq_handle != cmd.send_cq_handle) { + rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0); + if (!rcq) { + ret = -EINVAL; + goto err_put; + } } } + + scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq); + rcq = rcq ?: scq; + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd || !scq) { + ret = -EINVAL; + goto err_put; + } + device = pd->device; } @@ -1515,8 +1633,13 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, goto err_copy; } - if (xrcd) + if (xrcd) { + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, + uobject); + atomic_inc(&obj->uxrcd->refcnt); put_xrcd_read(xrcd_uobj); + } + if (pd) put_pd_read(pd); if (scq) @@ -1585,7 +1708,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key); + init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); down_write(&obj->uevent.uobject.mutex); xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); @@ -1626,6 +1749,8 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file, goto err_remove; } + obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); + atomic_inc(&obj->uxrcd->refcnt); put_xrcd_read(xrcd_uobj); mutex_lock(&file->mutex); @@ -1837,6 +1962,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; if (qp->real_qp == qp) { + ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask); + if (ret) + goto out; ret = qp->device->modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); } else { @@ -1892,6 +2020,9 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, if (ret) return ret; + if (obj->uxrcd) + atomic_dec(&obj->uxrcd->refcnt); + idr_remove_uobj(&ib_uverbs_qp_idr, uobj); mutex_lock(&file->mutex); @@ -1987,6 +2118,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file, } next->wr.ud.remote_qpn = user_wr->wr.ud.remote_qpn; next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey; + if (next->opcode == IB_WR_SEND_WITH_IMM) + next->ex.imm_data = + (__be32 __force) user_wr->ex.imm_data; } else { switch (next->opcode) { case IB_WR_RDMA_WRITE_WITH_IMM: @@ -2272,7 +2406,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (!uobj) return -ENOMEM; - init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key); + init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); down_write(&uobj->mutex); pd = idr_read_pd(cmd.pd_handle, file->ucontext); @@ -2460,6 +2594,251 @@ out_put: return ret ? ret : in_len; } +static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, + union ib_flow_spec *ib_spec) +{ + if (kern_spec->reserved) + return -EINVAL; + + ib_spec->type = kern_spec->type; + + switch (ib_spec->type) { + case IB_FLOW_SPEC_ETH: + ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); + if (ib_spec->eth.size != kern_spec->eth.size) + return -EINVAL; + memcpy(&ib_spec->eth.val, &kern_spec->eth.val, + sizeof(struct ib_flow_eth_filter)); + memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, + sizeof(struct ib_flow_eth_filter)); + break; + case IB_FLOW_SPEC_IPV4: + ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); + if (ib_spec->ipv4.size != kern_spec->ipv4.size) + return -EINVAL; + memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, + sizeof(struct ib_flow_ipv4_filter)); + memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, + sizeof(struct ib_flow_ipv4_filter)); + break; + case IB_FLOW_SPEC_TCP: + case IB_FLOW_SPEC_UDP: + ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); + if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size) + return -EINVAL; + memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, + sizeof(struct ib_flow_tcp_udp_filter)); + memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, + sizeof(struct ib_flow_tcp_udp_filter)); + break; + default: + return -EINVAL; + } + return 0; +} + +int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_create_flow cmd; + struct ib_uverbs_create_flow_resp resp; + struct ib_uobject *uobj; + struct ib_flow *flow_id; + struct ib_uverbs_flow_attr *kern_flow_attr; + struct ib_flow_attr *flow_attr; + struct ib_qp *qp; + int err = 0; + void *kern_spec; + void *ib_spec; + int i; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + if (ucore->outlen < sizeof(resp)) + return -ENOSPC; + + err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (err) + return err; + + ucore->inbuf += sizeof(cmd); + ucore->inlen -= sizeof(cmd); + + if (cmd.comp_mask) + return -EINVAL; + + if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER && + !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW)) + return -EPERM; + + if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS) + return -EINVAL; + + if (cmd.flow_attr.size > ucore->inlen || + cmd.flow_attr.size > + (cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec))) + return -EINVAL; + + if (cmd.flow_attr.reserved[0] || + cmd.flow_attr.reserved[1]) + return -EINVAL; + + if (cmd.flow_attr.num_of_specs) { + kern_flow_attr = kmalloc(sizeof(*kern_flow_attr) + cmd.flow_attr.size, + GFP_KERNEL); + if (!kern_flow_attr) + return -ENOMEM; + + memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr)); + err = ib_copy_from_udata(kern_flow_attr + 1, ucore, + cmd.flow_attr.size); + if (err) + goto err_free_attr; + } else { + kern_flow_attr = &cmd.flow_attr; + } + + uobj = kmalloc(sizeof(*uobj), GFP_KERNEL); + if (!uobj) { + err = -ENOMEM; + goto err_free_attr; + } + init_uobj(uobj, 0, file->ucontext, &rule_lock_class); + down_write(&uobj->mutex); + + qp = idr_read_qp(cmd.qp_handle, file->ucontext); + if (!qp) { + err = -EINVAL; + goto err_uobj; + } + + flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL); + if (!flow_attr) { + err = -ENOMEM; + goto err_put; + } + + flow_attr->type = kern_flow_attr->type; + flow_attr->priority = kern_flow_attr->priority; + flow_attr->num_of_specs = kern_flow_attr->num_of_specs; + flow_attr->port = kern_flow_attr->port; + flow_attr->flags = kern_flow_attr->flags; + flow_attr->size = sizeof(*flow_attr); + + kern_spec = kern_flow_attr + 1; + ib_spec = flow_attr + 1; + for (i = 0; i < flow_attr->num_of_specs && + cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) && + cmd.flow_attr.size >= + ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) { + err = kern_spec_to_ib_spec(kern_spec, ib_spec); + if (err) + goto err_free; + flow_attr->size += + ((union ib_flow_spec *) ib_spec)->size; + cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size; + kern_spec += ((struct ib_uverbs_flow_spec *) kern_spec)->size; + ib_spec += ((union ib_flow_spec *) ib_spec)->size; + } + if (cmd.flow_attr.size || (i != flow_attr->num_of_specs)) { + pr_warn("create flow failed, flow %d: %d bytes left from uverb cmd\n", + i, cmd.flow_attr.size); + err = -EINVAL; + goto err_free; + } + flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER); + if (IS_ERR(flow_id)) { + err = PTR_ERR(flow_id); + goto err_free; + } + flow_id->qp = qp; + flow_id->uobject = uobj; + uobj->object = flow_id; + + err = idr_add_uobj(&ib_uverbs_rule_idr, uobj); + if (err) + goto destroy_flow; + + memset(&resp, 0, sizeof(resp)); + resp.flow_handle = uobj->id; + + err = ib_copy_to_udata(ucore, + &resp, sizeof(resp)); + if (err) + goto err_copy; + + put_qp_read(qp); + mutex_lock(&file->mutex); + list_add_tail(&uobj->list, &file->ucontext->rule_list); + mutex_unlock(&file->mutex); + + uobj->live = 1; + + up_write(&uobj->mutex); + kfree(flow_attr); + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return 0; +err_copy: + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); +destroy_flow: + ib_destroy_flow(flow_id); +err_free: + kfree(flow_attr); +err_put: + put_qp_read(qp); +err_uobj: + put_uobj_write(uobj); +err_free_attr: + if (cmd.flow_attr.num_of_specs) + kfree(kern_flow_attr); + return err; +} + +int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_destroy_flow cmd; + struct ib_flow *flow_id; + struct ib_uobject *uobj; + int ret; + + if (ucore->inlen < sizeof(cmd)) + return -EINVAL; + + ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd)); + if (ret) + return ret; + + if (cmd.comp_mask) + return -EINVAL; + + uobj = idr_write_uobj(&ib_uverbs_rule_idr, cmd.flow_handle, + file->ucontext); + if (!uobj) + return -EINVAL; + flow_id = uobj->object; + + ret = ib_destroy_flow(flow_id); + if (!ret) + uobj->live = 0; + + put_uobj_write(uobj); + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + + mutex_lock(&file->mutex); + list_del(&uobj->list); + mutex_unlock(&file->mutex); + + put_uobj(uobj); + + return ret; +} + static int __uverbs_create_xsrq(struct ib_uverbs_file *file, struct ib_uverbs_create_xsrq *cmd, struct ib_udata *udata) @@ -2476,30 +2855,30 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file, if (!obj) return -ENOMEM; - init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_key); + init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); down_write(&obj->uevent.uobject.mutex); - pd = idr_read_pd(cmd->pd_handle, file->ucontext); - if (!pd) { - ret = -EINVAL; - goto err; - } - if (cmd->srq_type == IB_SRQT_XRC) { - attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); - if (!attr.ext.xrc.cq) { - ret = -EINVAL; - goto err_put_pd; - } - attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); if (!attr.ext.xrc.xrcd) { ret = -EINVAL; - goto err_put_cq; + goto err; } obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); atomic_inc(&obj->uxrcd->refcnt); + + attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); + if (!attr.ext.xrc.cq) { + ret = -EINVAL; + goto err_put_xrcd; + } + } + + pd = idr_read_pd(cmd->pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto err_put_cq; } attr.event_handler = ib_uverbs_srq_event_handler; @@ -2576,17 +2955,17 @@ err_destroy: ib_destroy_srq(srq); err_put: - if (cmd->srq_type == IB_SRQT_XRC) { - atomic_dec(&obj->uxrcd->refcnt); - put_uobj_read(xrcd_uobj); - } + put_pd_read(pd); err_put_cq: if (cmd->srq_type == IB_SRQT_XRC) put_cq_read(attr.ext.xrc.cq); -err_put_pd: - put_pd_read(pd); +err_put_xrcd: + if (cmd->srq_type == IB_SRQT_XRC) { + atomic_dec(&obj->uxrcd->refcnt); + put_uobj_read(xrcd_uobj); + } err: put_uobj_write(&obj->uevent.uobject); @@ -2733,6 +3112,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, struct ib_srq *srq; struct ib_uevent_object *obj; int ret = -EINVAL; + struct ib_usrq_object *us; + enum ib_srq_type srq_type; if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; @@ -2742,6 +3123,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, return -EINVAL; srq = uobj->object; obj = container_of(uobj, struct ib_uevent_object, uobject); + srq_type = srq->srq_type; ret = ib_destroy_srq(srq); if (!ret) @@ -2752,6 +3134,11 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file, if (ret) return ret; + if (srq_type == IB_SRQT_XRC) { + us = container_of(obj, struct ib_usrq_object, uevent); + atomic_dec(&us->uxrcd->refcnt); + } + idr_remove_uobj(&ib_uverbs_srq_idr, uobj); mutex_lock(&file->mutex); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 604556d73d2..08219fb3338 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -73,6 +73,7 @@ DEFINE_IDR(ib_uverbs_cq_idr); DEFINE_IDR(ib_uverbs_qp_idr); DEFINE_IDR(ib_uverbs_srq_idr); DEFINE_IDR(ib_uverbs_xrcd_idr); +DEFINE_IDR(ib_uverbs_rule_idr); static DEFINE_SPINLOCK(map_lock); static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); @@ -87,6 +88,8 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, + [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq, @@ -111,7 +114,14 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd, [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd, [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq, - [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp + [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp, +}; + +static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, + struct ib_udata *ucore, + struct ib_udata *uhw) = { + [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow, + [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow }; static void ib_uverbs_add_one(struct ib_device *device); @@ -201,6 +211,23 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uobj); } + /* Remove MWs before QPs, in order to support type 2A MWs. */ + list_for_each_entry_safe(uobj, tmp, &context->mw_list, list) { + struct ib_mw *mw = uobj->object; + + idr_remove_uobj(&ib_uverbs_mw_idr, uobj); + ib_dealloc_mw(mw); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->rule_list, list) { + struct ib_flow *flow_id = uobj->object; + + idr_remove_uobj(&ib_uverbs_rule_idr, uobj); + ib_destroy_flow(flow_id); + kfree(uobj); + } + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { struct ib_qp *qp = uobj->object; struct ib_uqp_object *uqp = @@ -240,8 +267,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file, kfree(uevent); } - /* XXX Free MWs */ - list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { struct ib_mr *mr = uobj->object; @@ -541,16 +566,15 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file, struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) { struct ib_uverbs_event_file *ev_file = NULL; - struct file *filp; + struct fd f = fdget(fd); - filp = fget(fd); - if (!filp) + if (!f.file) return NULL; - if (filp->f_op != &uverbs_event_fops) + if (f.file->f_op != &uverbs_event_fops) goto out; - ev_file = filp->private_data; + ev_file = f.file->private_data; if (ev_file->is_async) { ev_file = NULL; goto out; @@ -559,7 +583,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) kref_get(&ev_file->ref); out: - fput(filp); + fdput(f); return ev_file; } @@ -568,6 +592,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, { struct ib_uverbs_file *file = filp->private_data; struct ib_uverbs_cmd_hdr hdr; + __u32 flags; if (count < sizeof hdr) return -EINVAL; @@ -575,22 +600,110 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (copy_from_user(&hdr, buf, sizeof hdr)) return -EFAULT; - if (hdr.in_words * 4 != count) - return -EINVAL; + flags = (hdr.command & + IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT; - if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || - !uverbs_cmd_table[hdr.command]) - return -EINVAL; + if (!flags) { + __u32 command; - if (!file->ucontext && - hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) - return -EINVAL; + if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | + IB_USER_VERBS_CMD_COMMAND_MASK)) + return -EINVAL; + + command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + + if (command >= ARRAY_SIZE(uverbs_cmd_table) || + !uverbs_cmd_table[command]) + return -EINVAL; + + if (!file->ucontext && + command != IB_USER_VERBS_CMD_GET_CONTEXT) + return -EINVAL; + + if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << command))) + return -ENOSYS; + + if (hdr.in_words * 4 != count) + return -EINVAL; + + return uverbs_cmd_table[command](file, + buf + sizeof(hdr), + hdr.in_words * 4, + hdr.out_words * 4); - if (!(file->device->ib_dev->uverbs_cmd_mask & (1ull << hdr.command))) - return -ENOSYS; + } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) { + __u32 command; + + struct ib_uverbs_ex_cmd_hdr ex_hdr; + struct ib_udata ucore; + struct ib_udata uhw; + int err; + size_t written_count = count; + + if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK | + IB_USER_VERBS_CMD_COMMAND_MASK)) + return -EINVAL; + + command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK; + + if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) || + !uverbs_ex_cmd_table[command]) + return -ENOSYS; + + if (!file->ucontext) + return -EINVAL; + + if (!(file->device->ib_dev->uverbs_ex_cmd_mask & (1ull << command))) + return -ENOSYS; + + if (count < (sizeof(hdr) + sizeof(ex_hdr))) + return -EINVAL; + + if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) + return -EFAULT; + + count -= sizeof(hdr) + sizeof(ex_hdr); + buf += sizeof(hdr) + sizeof(ex_hdr); + + if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) + return -EINVAL; + + if (ex_hdr.cmd_hdr_reserved) + return -EINVAL; + + if (ex_hdr.response) { + if (!hdr.out_words && !ex_hdr.provider_out_words) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, + (void __user *) (unsigned long) ex_hdr.response, + (hdr.out_words + ex_hdr.provider_out_words) * 8)) + return -EFAULT; + } else { + if (hdr.out_words || ex_hdr.provider_out_words) + return -EINVAL; + } + + INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response, + hdr.in_words * 8, hdr.out_words * 8); + + INIT_UDATA_BUF_OR_NULL(&uhw, + buf + ucore.inlen, + (unsigned long) ex_hdr.response + ucore.outlen, + ex_hdr.provider_in_words * 8, + ex_hdr.provider_out_words * 8); + + err = uverbs_ex_cmd_table[command](file, + &ucore, + &uhw); + + if (err) + return err; + + return written_count; + } - return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, - hdr.in_words * 4, hdr.out_words * 4); + return -ENOSYS; } static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 575b78045aa..c2b89cc5dbc 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -44,8 +44,11 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_cache.h> +#include <rdma/ib_addr.h> -int ib_rate_to_mult(enum ib_rate rate) +#include "core_priv.h" + +__attribute_const__ int ib_rate_to_mult(enum ib_rate rate) { switch (rate) { case IB_RATE_2_5_GBPS: return 1; @@ -62,7 +65,7 @@ int ib_rate_to_mult(enum ib_rate rate) } EXPORT_SYMBOL(ib_rate_to_mult); -enum ib_rate mult_to_ib_rate(int mult) +__attribute_const__ enum ib_rate mult_to_ib_rate(int mult) { switch (mult) { case 1: return IB_RATE_2_5_GBPS; @@ -79,7 +82,7 @@ enum ib_rate mult_to_ib_rate(int mult) } EXPORT_SYMBOL(mult_to_ib_rate); -int ib_rate_to_mbps(enum ib_rate rate) +__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate) { switch (rate) { case IB_RATE_2_5_GBPS: return 2500; @@ -104,7 +107,7 @@ int ib_rate_to_mbps(enum ib_rate rate) } EXPORT_SYMBOL(ib_rate_to_mbps); -enum rdma_transport_type +__attribute_const__ enum rdma_transport_type rdma_node_get_transport(enum rdma_node_type node_type) { switch (node_type) { @@ -114,6 +117,10 @@ rdma_node_get_transport(enum rdma_node_type node_type) return RDMA_TRANSPORT_IB; case RDMA_NODE_RNIC: return RDMA_TRANSPORT_IWARP; + case RDMA_NODE_USNIC: + return RDMA_TRANSPORT_USNIC; + case RDMA_NODE_USNIC_UDP: + return RDMA_TRANSPORT_USNIC_UDP; default: BUG(); return 0; @@ -130,6 +137,8 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_ case RDMA_TRANSPORT_IB: return IB_LINK_LAYER_INFINIBAND; case RDMA_TRANSPORT_IWARP: + case RDMA_TRANSPORT_USNIC: + case RDMA_TRANSPORT_USNIC_UDP: return IB_LINK_LAYER_ETHERNET; default: return IB_LINK_LAYER_UNSPECIFIED; @@ -189,8 +198,28 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc, u32 flow_class; u16 gid_index; int ret; + int is_eth = (rdma_port_get_link_layer(device, port_num) == + IB_LINK_LAYER_ETHERNET); memset(ah_attr, 0, sizeof *ah_attr); + if (is_eth) { + if (!(wc->wc_flags & IB_WC_GRH)) + return -EPROTOTYPE; + + if (wc->wc_flags & IB_WC_WITH_SMAC && + wc->wc_flags & IB_WC_WITH_VLAN) { + memcpy(ah_attr->dmac, wc->smac, ETH_ALEN); + ah_attr->vlan_id = wc->vlan_id; + } else { + ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid, + ah_attr->dmac, &ah_attr->vlan_id); + if (ret) + return ret; + } + } else { + ah_attr->vlan_id = 0xffff; + } + ah_attr->dlid = wc->slid; ah_attr->sl = wc->sl; ah_attr->src_path_bits = wc->dlid_path_bits; @@ -346,9 +375,13 @@ EXPORT_SYMBOL(ib_destroy_srq); static void __ib_shared_qp_event_handler(struct ib_event *event, void *context) { struct ib_qp *qp = context; + unsigned long flags; + spin_lock_irqsave(&qp->device->event_handler_lock, flags); list_for_each_entry(event->element.qp, &qp->open_list, open_list) - event->element.qp->event_handler(event, event->element.qp->qp_context); + if (event->element.qp->event_handler) + event->element.qp->event_handler(event, event->element.qp->qp_context); + spin_unlock_irqrestore(&qp->device->event_handler_lock, flags); } static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp) @@ -469,7 +502,9 @@ EXPORT_SYMBOL(ib_create_qp); static const struct { int valid; enum ib_qp_attr_mask req_param[IB_QPT_MAX]; + enum ib_qp_attr_mask req_param_add_eth[IB_QPT_MAX]; enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; + enum ib_qp_attr_mask opt_param_add_eth[IB_QPT_MAX]; } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { [IB_QPS_RESET] = { [IB_QPS_RESET] = { .valid = 1 }, @@ -479,6 +514,7 @@ static const struct { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_PORT, [IB_QPT_UC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), @@ -549,6 +585,12 @@ static const struct { IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER), }, + .req_param_add_eth = { + [IB_QPT_RC] = (IB_QP_SMAC), + [IB_QPT_UC] = (IB_QP_SMAC), + [IB_QPT_XRC_INI] = (IB_QP_SMAC), + [IB_QPT_XRC_TGT] = (IB_QP_SMAC) + }, .opt_param = { [IB_QPT_UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), @@ -568,7 +610,21 @@ static const struct { IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), - } + }, + .opt_param_add_eth = { + [IB_QPT_RC] = (IB_QP_ALT_SMAC | + IB_QP_VID | + IB_QP_ALT_VID), + [IB_QPT_UC] = (IB_QP_ALT_SMAC | + IB_QP_VID | + IB_QP_ALT_VID), + [IB_QPT_XRC_INI] = (IB_QP_ALT_SMAC | + IB_QP_VID | + IB_QP_ALT_VID), + [IB_QPT_XRC_TGT] = (IB_QP_ALT_SMAC | + IB_QP_VID | + IB_QP_ALT_VID) + } } }, [IB_QPS_RTR] = { @@ -771,7 +827,8 @@ static const struct { }; int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, - enum ib_qp_type type, enum ib_qp_attr_mask mask) + enum ib_qp_type type, enum ib_qp_attr_mask mask, + enum rdma_link_layer ll) { enum ib_qp_attr_mask req_param, opt_param; @@ -790,6 +847,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, req_param = qp_state_table[cur_state][next_state].req_param[type]; opt_param = qp_state_table[cur_state][next_state].opt_param[type]; + if (ll == IB_LINK_LAYER_ETHERNET) { + req_param |= qp_state_table[cur_state][next_state]. + req_param_add_eth[type]; + opt_param |= qp_state_table[cur_state][next_state]. + opt_param_add_eth[type]; + } + if ((mask & req_param) != req_param) return 0; @@ -800,10 +864,51 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); +int ib_resolve_eth_l2_attrs(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, int *qp_attr_mask) +{ + int ret = 0; + union ib_gid sgid; + + if ((*qp_attr_mask & IB_QP_AV) && + (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) { + ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num, + qp_attr->ah_attr.grh.sgid_index, &sgid); + if (ret) + goto out; + if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { + rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac); + rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac); + qp_attr->vlan_id = rdma_get_vlan_id(&sgid); + } else { + ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid, + qp_attr->ah_attr.dmac, &qp_attr->vlan_id); + if (ret) + goto out; + ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL); + if (ret) + goto out; + } + *qp_attr_mask |= IB_QP_SMAC; + if (qp_attr->vlan_id < 0xFFFF) + *qp_attr_mask |= IB_QP_VID; + } +out: + return ret; +} +EXPORT_SYMBOL(ib_resolve_eth_l2_attrs); + + int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { + int ret; + + ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask); + if (ret) + return ret; + return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } EXPORT_SYMBOL(ib_modify_qp); @@ -953,6 +1058,11 @@ EXPORT_SYMBOL(ib_resize_cq); struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) { struct ib_mr *mr; + int err; + + err = ib_check_mr_access(mr_access_flags); + if (err) + return ERR_PTR(err); mr = pd->device->get_dma_mr(pd, mr_access_flags); @@ -975,6 +1085,11 @@ struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, u64 *iova_start) { struct ib_mr *mr; + int err; + + err = ib_check_mr_access(mr_access_flags); + if (err) + return ERR_PTR(err); if (!pd->device->reg_phys_mr) return ERR_PTR(-ENOSYS); @@ -1005,6 +1120,10 @@ int ib_rereg_phys_mr(struct ib_mr *mr, struct ib_pd *old_pd; int ret; + ret = ib_check_mr_access(mr_access_flags); + if (ret) + return ret; + if (!mr->device->rereg_phys_mr) return -ENOSYS; @@ -1050,6 +1169,45 @@ int ib_dereg_mr(struct ib_mr *mr) } EXPORT_SYMBOL(ib_dereg_mr); +struct ib_mr *ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct ib_mr *mr; + + if (!pd->device->create_mr) + return ERR_PTR(-ENOSYS); + + mr = pd->device->create_mr(pd, mr_init_attr); + + if (!IS_ERR(mr)) { + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + } + + return mr; +} +EXPORT_SYMBOL(ib_create_mr); + +int ib_destroy_mr(struct ib_mr *mr) +{ + struct ib_pd *pd; + int ret; + + if (atomic_read(&mr->usecnt)) + return -EBUSY; + + pd = mr->pd; + ret = mr->device->destroy_mr(mr); + if (!ret) + atomic_dec(&pd->usecnt); + + return ret; +} +EXPORT_SYMBOL(ib_destroy_mr); + struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { struct ib_mr *mr; @@ -1098,18 +1256,19 @@ EXPORT_SYMBOL(ib_free_fast_reg_page_list); /* Memory windows */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd) +struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct ib_mw *mw; if (!pd->device->alloc_mw) return ERR_PTR(-ENOSYS); - mw = pd->device->alloc_mw(pd); + mw = pd->device->alloc_mw(pd, type); if (!IS_ERR(mw)) { mw->device = pd->device; mw->pd = pd; mw->uobject = NULL; + mw->type = type; atomic_inc(&pd->usecnt); } @@ -1183,23 +1342,33 @@ EXPORT_SYMBOL(ib_dealloc_fmr); int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { + int ret; + if (!qp->device->attach_mcast) return -ENOSYS; if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) return -EINVAL; - return qp->device->attach_mcast(qp, gid, lid); + ret = qp->device->attach_mcast(qp, gid, lid); + if (!ret) + atomic_inc(&qp->usecnt); + return ret; } EXPORT_SYMBOL(ib_attach_mcast); int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) { + int ret; + if (!qp->device->detach_mcast) return -ENOSYS; if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) return -EINVAL; - return qp->device->detach_mcast(qp, gid, lid); + ret = qp->device->detach_mcast(qp, gid, lid); + if (!ret) + atomic_dec(&qp->usecnt); + return ret; } EXPORT_SYMBOL(ib_detach_mcast); @@ -1241,3 +1410,38 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd) return xrcd->device->dealloc_xrcd(xrcd); } EXPORT_SYMBOL(ib_dealloc_xrcd); + +struct ib_flow *ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, + int domain) +{ + struct ib_flow *flow_id; + if (!qp->device->create_flow) + return ERR_PTR(-ENOSYS); + + flow_id = qp->device->create_flow(qp, flow_attr, domain); + if (!IS_ERR(flow_id)) + atomic_inc(&qp->usecnt); + return flow_id; +} +EXPORT_SYMBOL(ib_create_flow); + +int ib_destroy_flow(struct ib_flow *flow_id) +{ + int err; + struct ib_qp *qp = flow_id->qp; + + err = qp->device->destroy_flow(flow_id); + if (!err) + atomic_dec(&qp->usecnt); + return err; +} +EXPORT_SYMBOL(ib_destroy_flow); + +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + return mr->device->check_mr_status ? + mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS; +} +EXPORT_SYMBOL(ib_check_mr_status); |
