diff options
Diffstat (limited to 'drivers/infiniband/core/addr.c')
| -rw-r--r-- | drivers/infiniband/core/addr.c | 442 |
1 files changed, 309 insertions, 133 deletions
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 781ea595037..8172d37f9ad 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -4,39 +4,48 @@ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * - * This Software is licensed under one of the following licenses: + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: * - * 1) under the terms of the "Common Public License 1.0" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/cpl.php. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: * - * 2) under the terms of the "The BSD License" a copy of which is - * available from the Open Source Initiative, see - * http://www.opensource.org/licenses/bsd-license.php. + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. * - * 3) under the terms of the "GNU General Public License (GPL) Version 2" a - * copy of which is available from the Open Source Initiative, see - * http://www.opensource.org/licenses/gpl-license.php. + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. * - * Licensee has the right to choose one of the above licenses. - * - * Redistributions of source code must retain the above copyright - * notice and one of the license notices. - * - * Redistributions in binary form must reproduce both the above copyright - * notice, one of the license notices in the documentation - * and/or other materials provided with the distribution. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. */ #include <linux/mutex.h> #include <linux/inetdevice.h> +#include <linux/slab.h> #include <linux/workqueue.h> -#include <linux/if_arp.h> +#include <linux/module.h> #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> #include <net/netevent.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> #include <rdma/ib_addr.h> +#include <rdma/ib.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("IB Address Translation"); @@ -44,8 +53,8 @@ MODULE_LICENSE("Dual BSD/GPL"); struct addr_req { struct list_head list; - struct sockaddr src_addr; - struct sockaddr dst_addr; + struct sockaddr_storage src_addr; + struct sockaddr_storage dst_addr; struct rdma_dev_addr *addr; struct rdma_addr_client *client; void *context; @@ -62,6 +71,23 @@ static LIST_HEAD(req_list); static DECLARE_DELAYED_WORK(work, process_req); static struct workqueue_struct *addr_wq; +int rdma_addr_size(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + case AF_IB: + return sizeof(struct sockaddr_ib); + default: + return 0; + } +} +EXPORT_SYMBOL(rdma_addr_size); + +static struct rdma_addr_client self; + void rdma_addr_register_client(struct rdma_addr_client *client) { atomic_set(&client->refcount, 1); @@ -85,37 +111,62 @@ EXPORT_SYMBOL(rdma_addr_unregister_client); int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev, const unsigned char *dst_dev_addr) { - switch (dev->type) { - case ARPHRD_INFINIBAND: - dev_addr->dev_type = RDMA_NODE_IB_CA; - break; - case ARPHRD_ETHER: - dev_addr->dev_type = RDMA_NODE_RNIC; - break; - default: - return -EADDRNOTAVAIL; - } - + dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); if (dst_dev_addr) memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN); + dev_addr->bound_dev_if = dev->ifindex; return 0; } EXPORT_SYMBOL(rdma_copy_addr); -int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr) +int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr, + u16 *vlan_id) { struct net_device *dev; - __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; - int ret; + int ret = -EADDRNOTAVAIL; + + if (dev_addr->bound_dev_if) { + dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if); + if (!dev) + return -ENODEV; + ret = rdma_copy_addr(dev_addr, dev, NULL); + dev_put(dev); + return ret; + } - dev = ip_dev_find(&init_net, ip); - if (!dev) - return -EADDRNOTAVAIL; + switch (addr->sa_family) { + case AF_INET: + dev = ip_dev_find(&init_net, + ((struct sockaddr_in *) addr)->sin_addr.s_addr); - ret = rdma_copy_addr(dev_addr, dev, NULL); - dev_put(dev); + if (!dev) + return ret; + + ret = rdma_copy_addr(dev_addr, dev, NULL); + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + break; + +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + rcu_read_lock(); + for_each_netdev_rcu(&init_net, dev) { + if (ipv6_chk_addr(&init_net, + &((struct sockaddr_in6 *) addr)->sin6_addr, + dev, 1)) { + ret = rdma_copy_addr(dev_addr, dev, NULL); + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(dev); + break; + } + } + rcu_read_unlock(); + break; +#endif + } return ret; } EXPORT_SYMBOL(rdma_translate_ip); @@ -124,13 +175,11 @@ static void set_timeout(unsigned long time) { unsigned long delay; - cancel_delayed_work(&work); - delay = time - jiffies; if ((long)delay <= 0) delay = 1; - queue_delayed_work(addr_wq, &work, delay); + mod_delayed_work(addr_wq, &work, delay); } static void queue_req(struct addr_req *req) @@ -150,74 +199,142 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -static void addr_send_arp(struct sockaddr_in *dst_in) +static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr) { - struct rtable *rt; - struct flowi fl; - __be32 dst_ip = dst_in->sin_addr.s_addr; + struct neighbour *n; + int ret; - memset(&fl, 0, sizeof fl); - fl.nl_u.ip4_u.daddr = dst_ip; - if (ip_route_output_key(&init_net, &rt, &fl)) - return; + n = dst_neigh_lookup(dst, daddr); - neigh_event_send(rt->u.dst.neighbour, NULL); - ip_rt_put(rt); + rcu_read_lock(); + if (!n || !(n->nud_state & NUD_VALID)) { + if (n) + neigh_event_send(n, NULL); + ret = -ENODATA; + } else { + ret = rdma_copy_addr(dev_addr, dst->dev, n->ha); + } + rcu_read_unlock(); + + if (n) + neigh_release(n); + + return ret; } -static int addr_resolve_remote(struct sockaddr_in *src_in, - struct sockaddr_in *dst_in, - struct rdma_dev_addr *addr) +static int addr4_resolve(struct sockaddr_in *src_in, + struct sockaddr_in *dst_in, + struct rdma_dev_addr *addr) { __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; - struct flowi fl; struct rtable *rt; - struct neighbour *neigh; + struct flowi4 fl4; int ret; - memset(&fl, 0, sizeof fl); - fl.nl_u.ip4_u.daddr = dst_ip; - fl.nl_u.ip4_u.saddr = src_ip; - ret = ip_route_output_key(&init_net, &rt, &fl); - if (ret) + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = dst_ip; + fl4.saddr = src_ip; + fl4.flowi4_oif = addr->bound_dev_if; + rt = ip_route_output_key(&init_net, &fl4); + if (IS_ERR(rt)) { + ret = PTR_ERR(rt); goto out; + } + src_in->sin_family = AF_INET; + src_in->sin_addr.s_addr = fl4.saddr; + + if (rt->dst.dev->flags & IFF_LOOPBACK) { + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL); + if (!ret) + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); + goto put; + } /* If the device does ARP internally, return 'done' */ - if (rt->idev->dev->flags & IFF_NOARP) { - rdma_copy_addr(addr, rt->idev->dev, NULL); + if (rt->dst.dev->flags & IFF_NOARP) { + ret = rdma_copy_addr(addr, rt->dst.dev, NULL); goto put; } - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev); - if (!neigh) { - ret = -ENODATA; + ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr); +put: + ip_rt_put(rt); +out: + return ret; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int addr6_resolve(struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, + struct rdma_dev_addr *addr) +{ + struct flowi6 fl6; + struct dst_entry *dst; + int ret; + + memset(&fl6, 0, sizeof fl6); + fl6.daddr = dst_in->sin6_addr; + fl6.saddr = src_in->sin6_addr; + fl6.flowi6_oif = addr->bound_dev_if; + + dst = ip6_route_output(&init_net, NULL, &fl6); + if ((ret = dst->error)) goto put; + + if (ipv6_addr_any(&fl6.saddr)) { + ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev, + &fl6.daddr, 0, &fl6.saddr); + if (ret) + goto put; + + src_in->sin6_family = AF_INET6; + src_in->sin6_addr = fl6.saddr; } - if (!(neigh->nud_state & NUD_VALID)) { - ret = -ENODATA; - goto release; + if (dst->dev->flags & IFF_LOOPBACK) { + ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL); + if (!ret) + memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); + goto put; } - if (!src_ip) { - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = rt->rt_src; + /* If the device does ARP internally, return 'done' */ + if (dst->dev->flags & IFF_NOARP) { + ret = rdma_copy_addr(addr, dst->dev, NULL); + goto put; } - ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); -release: - neigh_release(neigh); + ret = dst_fetch_ha(dst, addr, &fl6.daddr); put: - ip_rt_put(rt); -out: + dst_release(dst); return ret; } +#else +static int addr6_resolve(struct sockaddr_in6 *src_in, + struct sockaddr_in6 *dst_in, + struct rdma_dev_addr *addr) +{ + return -EADDRNOTAVAIL; +} +#endif + +static int addr_resolve(struct sockaddr *src_in, + struct sockaddr *dst_in, + struct rdma_dev_addr *addr) +{ + if (src_in->sa_family == AF_INET) { + return addr4_resolve((struct sockaddr_in *) src_in, + (struct sockaddr_in *) dst_in, addr); + } else + return addr6_resolve((struct sockaddr_in6 *) src_in, + (struct sockaddr_in6 *) dst_in, addr); +} static void process_req(struct work_struct *work) { struct addr_req *req, *temp_req; - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src_in, *dst_in; struct list_head done_list; INIT_LIST_HEAD(&done_list); @@ -225,10 +342,9 @@ static void process_req(struct work_struct *work) mutex_lock(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->status == -ENODATA) { - src_in = (struct sockaddr_in *) &req->src_addr; - dst_in = (struct sockaddr_in *) &req->dst_addr; - req->status = addr_resolve_remote(src_in, dst_in, - req->addr); + src_in = (struct sockaddr *) &req->src_addr; + dst_in = (struct sockaddr *) &req->dst_addr; + req->status = addr_resolve(src_in, dst_in, req->addr); if (req->status && time_after_eq(jiffies, req->timeout)) req->status = -ETIMEDOUT; else if (req->status == -ENODATA) @@ -245,44 +361,13 @@ static void process_req(struct work_struct *work) list_for_each_entry_safe(req, temp_req, &done_list, list) { list_del(&req->list); - req->callback(req->status, &req->src_addr, req->addr, - req->context); + req->callback(req->status, (struct sockaddr *) &req->src_addr, + req->addr, req->context); put_client(req->client); kfree(req); } } -static int addr_resolve_local(struct sockaddr_in *src_in, - struct sockaddr_in *dst_in, - struct rdma_dev_addr *addr) -{ - struct net_device *dev; - __be32 src_ip = src_in->sin_addr.s_addr; - __be32 dst_ip = dst_in->sin_addr.s_addr; - int ret; - - dev = ip_dev_find(&init_net, dst_ip); - if (!dev) - return -EADDRNOTAVAIL; - - if (ipv4_is_zeronet(src_ip)) { - src_in->sin_family = dst_in->sin_family; - src_in->sin_addr.s_addr = dst_ip; - ret = rdma_copy_addr(addr, dev, dev->dev_addr); - } else if (ipv4_is_loopback(src_ip)) { - ret = rdma_translate_ip((struct sockaddr *)dst_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } else { - ret = rdma_translate_ip((struct sockaddr *)src_in, addr); - if (!ret) - memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN); - } - - dev_put(dev); - return ret; -} - int rdma_resolve_ip(struct rdma_addr_client *client, struct sockaddr *src_addr, struct sockaddr *dst_addr, struct rdma_dev_addr *addr, int timeout_ms, @@ -290,7 +375,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client, struct rdma_dev_addr *addr, void *context), void *context) { - struct sockaddr_in *src_in, *dst_in; + struct sockaddr *src_in, *dst_in; struct addr_req *req; int ret = 0; @@ -298,22 +383,28 @@ int rdma_resolve_ip(struct rdma_addr_client *client, if (!req) return -ENOMEM; - if (src_addr) - memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr)); - memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr)); + src_in = (struct sockaddr *) &req->src_addr; + dst_in = (struct sockaddr *) &req->dst_addr; + + if (src_addr) { + if (src_addr->sa_family != dst_addr->sa_family) { + ret = -EINVAL; + goto err; + } + + memcpy(src_in, src_addr, rdma_addr_size(src_addr)); + } else { + src_in->sa_family = dst_addr->sa_family; + } + + memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; req->client = client; atomic_inc(&client->refcount); - src_in = (struct sockaddr_in *) &req->src_addr; - dst_in = (struct sockaddr_in *) &req->dst_addr; - - req->status = addr_resolve_local(src_in, dst_in, addr); - if (req->status == -EADDRNOTAVAIL) - req->status = addr_resolve_remote(src_in, dst_in, addr); - + req->status = addr_resolve(src_in, dst_in, addr); switch (req->status) { case 0: req->timeout = jiffies; @@ -322,15 +413,16 @@ int rdma_resolve_ip(struct rdma_addr_client *client, case -ENODATA: req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; queue_req(req); - addr_send_arp(dst_in); break; default: ret = req->status; atomic_dec(&client->refcount); - kfree(req); - break; + goto err; } return ret; +err: + kfree(req); + return ret; } EXPORT_SYMBOL(rdma_resolve_ip); @@ -352,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr) } EXPORT_SYMBOL(rdma_addr_cancel); +struct resolve_cb_context { + struct rdma_dev_addr *addr; + struct completion comp; +}; + +static void resolve_cb(int status, struct sockaddr *src_addr, + struct rdma_dev_addr *addr, void *context) +{ + memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct + rdma_dev_addr)); + complete(&((struct resolve_cb_context *)context)->comp); +} + +int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac, + u16 *vlan_id) +{ + int ret = 0; + struct rdma_dev_addr dev_addr; + struct resolve_cb_context ctx; + struct net_device *dev; + + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } sgid_addr, dgid_addr; + + + ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid); + if (ret) + return ret; + + ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid); + if (ret) + return ret; + + memset(&dev_addr, 0, sizeof(dev_addr)); + + ctx.addr = &dev_addr; + init_completion(&ctx.comp); + ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr, + &dev_addr, 1000, resolve_cb, &ctx); + if (ret) + return ret; + + wait_for_completion(&ctx.comp); + + memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); + dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if); + if (!dev) + return -ENODEV; + if (vlan_id) + *vlan_id = rdma_vlan_dev_vlan_id(dev); + dev_put(dev); + return ret; +} +EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh); + +int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id) +{ + int ret = 0; + struct rdma_dev_addr dev_addr; + union { + struct sockaddr _sockaddr; + struct sockaddr_in _sockaddr_in; + struct sockaddr_in6 _sockaddr_in6; + } gid_addr; + + ret = rdma_gid2ip(&gid_addr._sockaddr, sgid); + + if (ret) + return ret; + memset(&dev_addr, 0, sizeof(dev_addr)); + ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id); + if (ret) + return ret; + + memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN); + return ret; +} +EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid); + static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { @@ -369,18 +543,20 @@ static struct notifier_block nb = { .notifier_call = netevent_callback }; -static int addr_init(void) +static int __init addr_init(void) { addr_wq = create_singlethread_workqueue("ib_addr"); if (!addr_wq) return -ENOMEM; register_netevent_notifier(&nb); + rdma_addr_register_client(&self); return 0; } -static void addr_cleanup(void) +static void __exit addr_cleanup(void) { + rdma_addr_unregister_client(&self); unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); } |
