aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/core/addr.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core/addr.c')
-rw-r--r--drivers/infiniband/core/addr.c442
1 files changed, 309 insertions, 133 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 781ea595037..8172d37f9ad 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -4,39 +4,48 @@
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
*
- * This Software is licensed under one of the following licenses:
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
*
- * 1) under the terms of the "Common Public License 1.0" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/cpl.php.
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
*
- * 2) under the terms of the "The BSD License" a copy of which is
- * available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/bsd-license.php.
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
*
- * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
- * copy of which is available from the Open Source Initiative, see
- * http://www.opensource.org/licenses/gpl-license.php.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
*
- * Licensee has the right to choose one of the above licenses.
- *
- * Redistributions of source code must retain the above copyright
- * notice and one of the license notices.
- *
- * Redistributions in binary form must reproduce both the above copyright
- * notice, one of the license notices in the documentation
- * and/or other materials provided with the distribution.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
*/
#include <linux/mutex.h>
#include <linux/inetdevice.h>
+#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/if_arp.h>
+#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
#include <net/netevent.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
@@ -44,8 +53,8 @@ MODULE_LICENSE("Dual BSD/GPL");
struct addr_req {
struct list_head list;
- struct sockaddr src_addr;
- struct sockaddr dst_addr;
+ struct sockaddr_storage src_addr;
+ struct sockaddr_storage dst_addr;
struct rdma_dev_addr *addr;
struct rdma_addr_client *client;
void *context;
@@ -62,6 +71,23 @@ static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
+int rdma_addr_size(struct sockaddr *addr)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ return sizeof(struct sockaddr_in);
+ case AF_INET6:
+ return sizeof(struct sockaddr_in6);
+ case AF_IB:
+ return sizeof(struct sockaddr_ib);
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(rdma_addr_size);
+
+static struct rdma_addr_client self;
+
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
@@ -85,37 +111,62 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
- switch (dev->type) {
- case ARPHRD_INFINIBAND:
- dev_addr->dev_type = RDMA_NODE_IB_CA;
- break;
- case ARPHRD_ETHER:
- dev_addr->dev_type = RDMA_NODE_RNIC;
- break;
- default:
- return -EADDRNOTAVAIL;
- }
-
+ dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
+ dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+ u16 *vlan_id)
{
struct net_device *dev;
- __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
- int ret;
+ int ret = -EADDRNOTAVAIL;
+
+ if (dev_addr->bound_dev_if) {
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_put(dev);
+ return ret;
+ }
- dev = ip_dev_find(&init_net, ip);
- if (!dev)
- return -EADDRNOTAVAIL;
+ switch (addr->sa_family) {
+ case AF_INET:
+ dev = ip_dev_find(&init_net,
+ ((struct sockaddr_in *) addr)->sin_addr.s_addr);
- ret = rdma_copy_addr(dev_addr, dev, NULL);
- dev_put(dev);
+ if (!dev)
+ return ret;
+
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
+ break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, dev) {
+ if (ipv6_chk_addr(&init_net,
+ &((struct sockaddr_in6 *) addr)->sin6_addr,
+ dev, 1)) {
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ break;
+ }
+ }
+ rcu_read_unlock();
+ break;
+#endif
+ }
return ret;
}
EXPORT_SYMBOL(rdma_translate_ip);
@@ -124,13 +175,11 @@ static void set_timeout(unsigned long time)
{
unsigned long delay;
- cancel_delayed_work(&work);
-
delay = time - jiffies;
if ((long)delay <= 0)
delay = 1;
- queue_delayed_work(addr_wq, &work, delay);
+ mod_delayed_work(addr_wq, &work, delay);
}
static void queue_req(struct addr_req *req)
@@ -150,74 +199,142 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static void addr_send_arp(struct sockaddr_in *dst_in)
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
{
- struct rtable *rt;
- struct flowi fl;
- __be32 dst_ip = dst_in->sin_addr.s_addr;
+ struct neighbour *n;
+ int ret;
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip4_u.daddr = dst_ip;
- if (ip_route_output_key(&init_net, &rt, &fl))
- return;
+ n = dst_neigh_lookup(dst, daddr);
- neigh_event_send(rt->u.dst.neighbour, NULL);
- ip_rt_put(rt);
+ rcu_read_lock();
+ if (!n || !(n->nud_state & NUD_VALID)) {
+ if (n)
+ neigh_event_send(n, NULL);
+ ret = -ENODATA;
+ } else {
+ ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
+ }
+ rcu_read_unlock();
+
+ if (n)
+ neigh_release(n);
+
+ return ret;
}
-static int addr_resolve_remote(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
+static int addr4_resolve(struct sockaddr_in *src_in,
+ struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
- struct flowi fl;
struct rtable *rt;
- struct neighbour *neigh;
+ struct flowi4 fl4;
int ret;
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip4_u.daddr = dst_ip;
- fl.nl_u.ip4_u.saddr = src_ip;
- ret = ip_route_output_key(&init_net, &rt, &fl);
- if (ret)
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.daddr = dst_ip;
+ fl4.saddr = src_ip;
+ fl4.flowi4_oif = addr->bound_dev_if;
+ rt = ip_route_output_key(&init_net, &fl4);
+ if (IS_ERR(rt)) {
+ ret = PTR_ERR(rt);
goto out;
+ }
+ src_in->sin_family = AF_INET;
+ src_in->sin_addr.s_addr = fl4.saddr;
+
+ if (rt->dst.dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
/* If the device does ARP internally, return 'done' */
- if (rt->idev->dev->flags & IFF_NOARP) {
- rdma_copy_addr(addr, rt->idev->dev, NULL);
+ if (rt->dst.dev->flags & IFF_NOARP) {
+ ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
goto put;
}
- neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
- if (!neigh) {
- ret = -ENODATA;
+ ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
+put:
+ ip_rt_put(rt);
+out:
+ return ret;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ struct flowi6 fl6;
+ struct dst_entry *dst;
+ int ret;
+
+ memset(&fl6, 0, sizeof fl6);
+ fl6.daddr = dst_in->sin6_addr;
+ fl6.saddr = src_in->sin6_addr;
+ fl6.flowi6_oif = addr->bound_dev_if;
+
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ if ((ret = dst->error))
goto put;
+
+ if (ipv6_addr_any(&fl6.saddr)) {
+ ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+ &fl6.daddr, 0, &fl6.saddr);
+ if (ret)
+ goto put;
+
+ src_in->sin6_family = AF_INET6;
+ src_in->sin6_addr = fl6.saddr;
}
- if (!(neigh->nud_state & NUD_VALID)) {
- ret = -ENODATA;
- goto release;
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
}
- if (!src_ip) {
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = rt->rt_src;
+ /* If the device does ARP internally, return 'done' */
+ if (dst->dev->flags & IFF_NOARP) {
+ ret = rdma_copy_addr(addr, dst->dev, NULL);
+ goto put;
}
- ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
-release:
- neigh_release(neigh);
+ ret = dst_fetch_ha(dst, addr, &fl6.daddr);
put:
- ip_rt_put(rt);
-out:
+ dst_release(dst);
return ret;
}
+#else
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ return -EADDRNOTAVAIL;
+}
+#endif
+
+static int addr_resolve(struct sockaddr *src_in,
+ struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ if (src_in->sa_family == AF_INET) {
+ return addr4_resolve((struct sockaddr_in *) src_in,
+ (struct sockaddr_in *) dst_in, addr);
+ } else
+ return addr6_resolve((struct sockaddr_in6 *) src_in,
+ (struct sockaddr_in6 *) dst_in, addr);
+}
static void process_req(struct work_struct *work)
{
struct addr_req *req, *temp_req;
- struct sockaddr_in *src_in, *dst_in;
+ struct sockaddr *src_in, *dst_in;
struct list_head done_list;
INIT_LIST_HEAD(&done_list);
@@ -225,10 +342,9 @@ static void process_req(struct work_struct *work)
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->status == -ENODATA) {
- src_in = (struct sockaddr_in *) &req->src_addr;
- dst_in = (struct sockaddr_in *) &req->dst_addr;
- req->status = addr_resolve_remote(src_in, dst_in,
- req->addr);
+ src_in = (struct sockaddr *) &req->src_addr;
+ dst_in = (struct sockaddr *) &req->dst_addr;
+ req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -245,44 +361,13 @@ static void process_req(struct work_struct *work)
list_for_each_entry_safe(req, temp_req, &done_list, list) {
list_del(&req->list);
- req->callback(req->status, &req->src_addr, req->addr,
- req->context);
+ req->callback(req->status, (struct sockaddr *) &req->src_addr,
+ req->addr, req->context);
put_client(req->client);
kfree(req);
}
}
-static int addr_resolve_local(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
-{
- struct net_device *dev;
- __be32 src_ip = src_in->sin_addr.s_addr;
- __be32 dst_ip = dst_in->sin_addr.s_addr;
- int ret;
-
- dev = ip_dev_find(&init_net, dst_ip);
- if (!dev)
- return -EADDRNOTAVAIL;
-
- if (ipv4_is_zeronet(src_ip)) {
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = dst_ip;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv4_is_loopback(src_ip)) {
- ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
-
- dev_put(dev);
- return ret;
-}
-
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
@@ -290,7 +375,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
struct rdma_dev_addr *addr, void *context),
void *context)
{
- struct sockaddr_in *src_in, *dst_in;
+ struct sockaddr *src_in, *dst_in;
struct addr_req *req;
int ret = 0;
@@ -298,22 +383,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
if (!req)
return -ENOMEM;
- if (src_addr)
- memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
- memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+ src_in = (struct sockaddr *) &req->src_addr;
+ dst_in = (struct sockaddr *) &req->dst_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ memcpy(src_in, src_addr, rdma_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
- src_in = (struct sockaddr_in *) &req->src_addr;
- dst_in = (struct sockaddr_in *) &req->dst_addr;
-
- req->status = addr_resolve_local(src_in, dst_in, addr);
- if (req->status == -EADDRNOTAVAIL)
- req->status = addr_resolve_remote(src_in, dst_in, addr);
-
+ req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -322,15 +413,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
- addr_send_arp(dst_in);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
- kfree(req);
- break;
+ goto err;
}
return ret;
+err:
+ kfree(req);
+ return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);
@@ -352,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
}
EXPORT_SYMBOL(rdma_addr_cancel);
+struct resolve_cb_context {
+ struct rdma_dev_addr *addr;
+ struct completion comp;
+};
+
+static void resolve_cb(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *addr, void *context)
+{
+ memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
+ rdma_dev_addr));
+ complete(&((struct resolve_cb_context *)context)->comp);
+}
+
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
+ u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ struct resolve_cb_context ctx;
+ struct net_device *dev;
+
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+
+
+ ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+ if (ret)
+ return ret;
+
+ ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+ if (ret)
+ return ret;
+
+ memset(&dev_addr, 0, sizeof(dev_addr));
+
+ ctx.addr = &dev_addr;
+ init_completion(&ctx.comp);
+ ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
+ &dev_addr, 1000, resolve_cb, &ctx);
+ if (ret)
+ return ret;
+
+ wait_for_completion(&ctx.comp);
+
+ memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
+ dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } gid_addr;
+
+ ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+
+ if (ret)
+ return ret;
+ memset(&dev_addr, 0, sizeof(dev_addr));
+ ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
+ if (ret)
+ return ret;
+
+ memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
+
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
@@ -369,18 +543,20 @@ static struct notifier_block nb = {
.notifier_call = netevent_callback
};
-static int addr_init(void)
+static int __init addr_init(void)
{
addr_wq = create_singlethread_workqueue("ib_addr");
if (!addr_wq)
return -ENOMEM;
register_netevent_notifier(&nb);
+ rdma_addr_register_client(&self);
return 0;
}
-static void addr_cleanup(void)
+static void __exit addr_cleanup(void)
{
+ rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}