aboutsummaryrefslogtreecommitdiff
path: root/drivers/infiniband/core/addr.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core/addr.c')
-rw-r--r--drivers/infiniband/core/addr.c423
1 files changed, 227 insertions, 196 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 5be1bd4fc7e..8172d37f9ad 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -35,8 +35,9 @@
#include <linux/mutex.h>
#include <linux/inetdevice.h>
+#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/if_arp.h>
+#include <linux/module.h>
#include <net/arp.h>
#include <net/neighbour.h>
#include <net/route.h>
@@ -44,6 +45,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib.h>
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("IB Address Translation");
@@ -69,6 +71,23 @@ static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
+int rdma_addr_size(struct sockaddr *addr)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ return sizeof(struct sockaddr_in);
+ case AF_INET6:
+ return sizeof(struct sockaddr_in6);
+ case AF_IB:
+ return sizeof(struct sockaddr_ib);
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL(rdma_addr_size);
+
+static struct rdma_addr_client self;
+
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
@@ -92,31 +111,31 @@ EXPORT_SYMBOL(rdma_addr_unregister_client);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
const unsigned char *dst_dev_addr)
{
- switch (dev->type) {
- case ARPHRD_INFINIBAND:
- dev_addr->dev_type = RDMA_NODE_IB_CA;
- break;
- case ARPHRD_ETHER:
- dev_addr->dev_type = RDMA_NODE_RNIC;
- break;
- default:
- return -EADDRNOTAVAIL;
- }
-
+ dev_addr->dev_type = dev->type;
memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
if (dst_dev_addr)
memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
- dev_addr->src_dev = dev;
+ dev_addr->bound_dev_if = dev->ifindex;
return 0;
}
EXPORT_SYMBOL(rdma_copy_addr);
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+ u16 *vlan_id)
{
struct net_device *dev;
int ret = -EADDRNOTAVAIL;
+ if (dev_addr->bound_dev_if) {
+ dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ ret = rdma_copy_addr(dev_addr, dev, NULL);
+ dev_put(dev);
+ return ret;
+ }
+
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(&init_net,
@@ -126,19 +145,25 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
return ret;
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
dev_put(dev);
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- for_each_netdev(&init_net, dev) {
+ rcu_read_lock();
+ for_each_netdev_rcu(&init_net, dev) {
if (ipv6_chk_addr(&init_net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
break;
}
}
+ rcu_read_unlock();
break;
#endif
}
@@ -150,13 +175,11 @@ static void set_timeout(unsigned long time)
{
unsigned long delay;
- cancel_delayed_work(&work);
-
delay = time - jiffies;
if ((long)delay <= 0)
delay = 1;
- queue_delayed_work(addr_wq, &work, delay);
+ mod_delayed_work(addr_wq, &work, delay);
}
static void queue_req(struct addr_req *req)
@@ -176,141 +199,135 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static void addr_send_arp(struct sockaddr *dst_in)
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
{
- struct rtable *rt;
- struct flowi fl;
-
- memset(&fl, 0, sizeof fl);
-
- switch (dst_in->sa_family) {
- case AF_INET:
- fl.nl_u.ip4_u.daddr =
- ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
- if (ip_route_output_key(&init_net, &rt, &fl))
- return;
-
- neigh_event_send(rt->u.dst.neighbour, NULL);
- ip_rt_put(rt);
- break;
+ struct neighbour *n;
+ int ret;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6:
- {
- struct dst_entry *dst;
+ n = dst_neigh_lookup(dst, daddr);
- fl.nl_u.ip6_u.daddr =
- ((struct sockaddr_in6 *) dst_in)->sin6_addr;
+ rcu_read_lock();
+ if (!n || !(n->nud_state & NUD_VALID)) {
+ if (n)
+ neigh_event_send(n, NULL);
+ ret = -ENODATA;
+ } else {
+ ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
+ }
+ rcu_read_unlock();
- dst = ip6_route_output(&init_net, NULL, &fl);
- if (!dst)
- return;
+ if (n)
+ neigh_release(n);
- neigh_event_send(dst->neighbour, NULL);
- dst_release(dst);
- break;
- }
-#endif
- }
+ return ret;
}
-static int addr4_resolve_remote(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
+static int addr4_resolve(struct sockaddr_in *src_in,
+ struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
- struct flowi fl;
struct rtable *rt;
- struct neighbour *neigh;
+ struct flowi4 fl4;
int ret;
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip4_u.daddr = dst_ip;
- fl.nl_u.ip4_u.saddr = src_ip;
- ret = ip_route_output_key(&init_net, &rt, &fl);
- if (ret)
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.daddr = dst_ip;
+ fl4.saddr = src_ip;
+ fl4.flowi4_oif = addr->bound_dev_if;
+ rt = ip_route_output_key(&init_net, &fl4);
+ if (IS_ERR(rt)) {
+ ret = PTR_ERR(rt);
goto out;
-
- /* If the device does ARP internally, return 'done' */
- if (rt->idev->dev->flags & IFF_NOARP) {
- rdma_copy_addr(addr, rt->idev->dev, NULL);
- goto put;
}
+ src_in->sin_family = AF_INET;
+ src_in->sin_addr.s_addr = fl4.saddr;
- neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
- if (!neigh) {
- ret = -ENODATA;
+ if (rt->dst.dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
- if (!(neigh->nud_state & NUD_VALID)) {
- ret = -ENODATA;
- goto release;
- }
-
- if (!src_ip) {
- src_in->sin_family = dst_in->sin_family;
- src_in->sin_addr.s_addr = rt->rt_src;
+ /* If the device does ARP internally, return 'done' */
+ if (rt->dst.dev->flags & IFF_NOARP) {
+ ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
+ goto put;
}
- ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
-release:
- neigh_release(neigh);
+ ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
put:
ip_rt_put(rt);
out:
return ret;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+#if IS_ENABLED(CONFIG_IPV6)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
{
- struct flowi fl;
- struct neighbour *neigh;
+ struct flowi6 fl6;
struct dst_entry *dst;
- int ret = -ENODATA;
+ int ret;
- memset(&fl, 0, sizeof fl);
- fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
- fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
+ memset(&fl6, 0, sizeof fl6);
+ fl6.daddr = dst_in->sin6_addr;
+ fl6.saddr = src_in->sin6_addr;
+ fl6.flowi6_oif = addr->bound_dev_if;
- dst = ip6_route_output(&init_net, NULL, &fl);
- if (!dst)
- return ret;
+ dst = ip6_route_output(&init_net, NULL, &fl6);
+ if ((ret = dst->error))
+ goto put;
+
+ if (ipv6_addr_any(&fl6.saddr)) {
+ ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+ &fl6.daddr, 0, &fl6.saddr);
+ if (ret)
+ goto put;
+
+ src_in->sin6_family = AF_INET6;
+ src_in->sin6_addr = fl6.saddr;
+ }
+
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+ goto put;
+ }
+ /* If the device does ARP internally, return 'done' */
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
- } else {
- neigh = dst->neighbour;
- if (neigh && (neigh->nud_state & NUD_VALID))
- ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+ goto put;
}
+ ret = dst_fetch_ha(dst, addr, &fl6.daddr);
+put:
dst_release(dst);
return ret;
}
#else
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+ struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr)
{
return -EADDRNOTAVAIL;
}
#endif
-static int addr_resolve_remote(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
+static int addr_resolve(struct sockaddr *src_in,
+ struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
{
if (src_in->sa_family == AF_INET) {
- return addr4_resolve_remote((struct sockaddr_in *) src_in,
+ return addr4_resolve((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
- return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
+ return addr6_resolve((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
}
@@ -327,8 +344,7 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve_remote(src_in, dst_in,
- req->addr);
+ req->status = addr_resolve(src_in, dst_in, req->addr);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -352,82 +368,6 @@ static void process_req(struct work_struct *work)
}
}
-static int addr_resolve_local(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
-{
- struct net_device *dev;
- int ret;
-
- switch (dst_in->sa_family) {
- case AF_INET:
- {
- __be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
- __be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
- dev = ip_dev_find(&init_net, dst_ip);
- if (!dev)
- return -EADDRNOTAVAIL;
-
- if (ipv4_is_zeronet(src_ip)) {
- src_in->sa_family = dst_in->sa_family;
- ((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv4_is_loopback(src_ip)) {
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip(src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
- dev_put(dev);
- break;
- }
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6:
- {
- struct in6_addr *a;
-
- for_each_netdev(&init_net, dev)
- if (ipv6_chk_addr(&init_net,
- &((struct sockaddr_in6 *) addr)->sin6_addr,
- dev, 1))
- break;
-
- if (!dev)
- return -EADDRNOTAVAIL;
-
- a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
-
- if (ipv6_addr_any(a)) {
- src_in->sa_family = dst_in->sa_family;
- ((struct sockaddr_in6 *) src_in)->sin6_addr =
- ((struct sockaddr_in6 *) dst_in)->sin6_addr;
- ret = rdma_copy_addr(addr, dev, dev->dev_addr);
- } else if (ipv6_addr_loopback(a)) {
- ret = rdma_translate_ip(dst_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- } else {
- ret = rdma_translate_ip(src_in, addr);
- if (!ret)
- memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
- }
- break;
- }
-#endif
-
- default:
- ret = -EADDRNOTAVAIL;
- break;
- }
-
- return ret;
-}
-
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
@@ -443,22 +383,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
if (!req)
return -ENOMEM;
- if (src_addr)
- memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
- memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+ src_in = (struct sockaddr *) &req->src_addr;
+ dst_in = (struct sockaddr *) &req->dst_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ memcpy(src_in, src_addr, rdma_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr));
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
- src_in = (struct sockaddr *) &req->src_addr;
- dst_in = (struct sockaddr *) &req->dst_addr;
-
- req->status = addr_resolve_local(src_in, dst_in, addr);
- if (req->status == -EADDRNOTAVAIL)
- req->status = addr_resolve_remote(src_in, dst_in, addr);
-
+ req->status = addr_resolve(src_in, dst_in, addr);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -467,15 +413,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
case -ENODATA:
req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
queue_req(req);
- addr_send_arp(dst_in);
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
- kfree(req);
- break;
+ goto err;
}
return ret;
+err:
+ kfree(req);
+ return ret;
}
EXPORT_SYMBOL(rdma_resolve_ip);
@@ -497,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
}
EXPORT_SYMBOL(rdma_addr_cancel);
+struct resolve_cb_context {
+ struct rdma_dev_addr *addr;
+ struct completion comp;
+};
+
+static void resolve_cb(int status, struct sockaddr *src_addr,
+ struct rdma_dev_addr *addr, void *context)
+{
+ memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
+ rdma_dev_addr));
+ complete(&((struct resolve_cb_context *)context)->comp);
+}
+
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
+ u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ struct resolve_cb_context ctx;
+ struct net_device *dev;
+
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+
+
+ ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+ if (ret)
+ return ret;
+
+ ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+ if (ret)
+ return ret;
+
+ memset(&dev_addr, 0, sizeof(dev_addr));
+
+ ctx.addr = &dev_addr;
+ init_completion(&ctx.comp);
+ ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
+ &dev_addr, 1000, resolve_cb, &ctx);
+ if (ret)
+ return ret;
+
+ wait_for_completion(&ctx.comp);
+
+ memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
+ dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
+ if (!dev)
+ return -ENODEV;
+ if (vlan_id)
+ *vlan_id = rdma_vlan_dev_vlan_id(dev);
+ dev_put(dev);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
+{
+ int ret = 0;
+ struct rdma_dev_addr dev_addr;
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } gid_addr;
+
+ ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+
+ if (ret)
+ return ret;
+ memset(&dev_addr, 0, sizeof(dev_addr));
+ ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
+ if (ret)
+ return ret;
+
+ memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
+ return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
+
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
@@ -521,11 +550,13 @@ static int __init addr_init(void)
return -ENOMEM;
register_netevent_notifier(&nb);
+ rdma_addr_register_client(&self);
return 0;
}
static void __exit addr_cleanup(void)
{
+ rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
}