From a7ca1f00ed2921b804d7ebda0f6fca8c9078fa42 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 16 Nov 2009 09:30:33 -0800 Subject: RDMA/ucma: Add option to manually set IB path Export rdma_set_ib_paths to user space to allow applications to manually set the IB path used for connections. This allows alternative ways for a user space application or library to obtain path record information, including retrieving path information from cached data, avoiding direct interaction with the IB SA. The IB SA is a single, centralized entity that can limit scaling on large clusters running MPI applications. Future changes to the rdma cm can expand on this framework to support the full range of features allowed by the IB CM, such as separate forward and reverse paths and APM. Signed-off-by: Sean Hefty Reviewed-By: Jason Gunthorpe Signed-off-by: Roland Dreier --- include/rdma/ib_sa.h | 6 ++++++ include/rdma/ib_user_sa.h | 16 ++++++++++++++++ include/rdma/rdma_user_cm.h | 6 ++++-- 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 3841c1aff69..1082afaed15 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -379,4 +379,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num, struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr); +/** + * ib_sa_unpack_path - Convert a path record from MAD format to struct + * ib_sa_path_rec. + */ +void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec); + #endif /* IB_SA_H */ diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h index 659120157e1..cfc7c9ba781 100644 --- a/include/rdma/ib_user_sa.h +++ b/include/rdma/ib_user_sa.h @@ -35,6 +35,22 @@ #include +enum { + IB_PATH_GMP = 1, + IB_PATH_PRIMARY = (1<<1), + IB_PATH_ALTERNATE = (1<<2), + IB_PATH_OUTBOUND = (1<<3), + IB_PATH_INBOUND = (1<<4), + IB_PATH_INBOUND_REVERSE = (1<<5), + IB_PATH_BIDIRECTIONAL = IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE +}; + +struct ib_path_rec_data { + __u32 flags; + __u32 reserved; + __u32 path_rec[16]; +}; + struct ib_user_path_rec { __u8 dgid[16]; __u8 sgid[16]; diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index c55705460b8..1d165022c02 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -215,12 +215,14 @@ struct rdma_ucm_event_resp { /* Option levels */ enum { - RDMA_OPTION_ID = 0 + RDMA_OPTION_ID = 0, + RDMA_OPTION_IB = 1 }; /* Option details */ enum { - RDMA_OPTION_ID_TOS = 0 + RDMA_OPTION_ID_TOS = 0, + RDMA_OPTION_IB_PATH = 1 }; struct rdma_ucm_set_option { -- cgit v1.2.3-18-g5258 From 6266ed6e4164466177238b11ecb825a3a108a3e4 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:55:22 -0800 Subject: RDMA/cma: Replace net_device pointer with index Provide the device interface when resolving route information to ensure that the correct outbound device is used. This will also simplify processing of sin6_scope_id for IPv6 support. Based on work from: David Wilder Jason Gunthorpe Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 483057b2f4b..27f17cc2c91 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -61,7 +61,7 @@ struct rdma_dev_addr { unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; enum rdma_node_type dev_type; - struct net_device *src_dev; + int bound_dev_if; }; /** -- cgit v1.2.3-18-g5258 From c4315d85f9b76834289fd503796c01b8311c4b84 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 12:57:18 -0800 Subject: IB/addr: Store net_device type instead of translating to RDMA transport The struct rdma_dev_addr stores net_device address information: the source device address, destination hardware address, and broadcast address. For consistency, store the net_device type rather than converting it to the rdma_node_type. The type indicates the format of the various hardware addresses, which is what we're concerned with, and not the RDMA node type that the address may map to. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 27f17cc2c91..3a39c55d2b9 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -60,7 +61,7 @@ struct rdma_dev_addr { unsigned char src_dev_addr[MAX_ADDR_LEN]; unsigned char dst_dev_addr[MAX_ADDR_LEN]; unsigned char broadcast[MAX_ADDR_LEN]; - enum rdma_node_type dev_type; + unsigned short dev_type; int bound_dev_if; }; -- cgit v1.2.3-18-g5258 From 6f8372b69c3198e06cecb1df2cb9682d0c55e657 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Thu, 19 Nov 2009 13:26:06 -0800 Subject: RDMA/cm: fix loopback address support The RDMA CM is intended to support the use of a loopback address when establishing a connection; however, the behavior of the CM when loopback addresses are used is confusing and does not always work, depending on whether loopback was specified by the server, the client, or both. The defined behavior of rdma_bind_addr is to associate an RDMA device with an rdma_cm_id, as long as the user specified a non- zero address. (ie they weren't just trying to reserve a port) Currently, if the loopback address is passed to rdam_bind_addr, no device is associated with the rdma_cm_id. Fix this. If a loopback address is specified by the client as the destination address for a connection, it will fail to establish a connection. This is true even if the server is listing across all addresses or on the loopback address itself. The issue is that the server tries to translate the IP address carried in the REQ message to a local net_device address, which fails. The translation is not needed in this case, since the REQ carries the actual HW address that should be used. Finally, cleanup loopback support to be more transport neutral. Replace separate calls to get/set the sgid and dgid from the device address to a single call that behaves correctly depending on the format of the device address. And support both IPv4 and IPv6 address formats. Signed-off-by: Sean Hefty [ Fixed RDS build by s/ib_addr_get/rdma_addr_get/ - Roland ] Signed-off-by: Roland Dreier --- include/rdma/ib_addr.h | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'include/rdma') diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index 3a39c55d2b9..fa0d52b8e62 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -122,40 +122,29 @@ static inline void ib_addr_get_mgid(struct rdma_dev_addr *dev_addr, memcpy(gid, dev_addr->broadcast + 4, sizeof *gid); } -static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr) { - memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid); + return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0; } -static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid); + memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } -static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid); + memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } -static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid); + memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid); } -static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) -{ - memcpy(gid, dev_addr->src_dev_addr, sizeof *gid); -} - -static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr, - union ib_gid *gid) +static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid) { - memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid); + memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid); } #endif /* IB_ADDR_H */ -- cgit v1.2.3-18-g5258 From 55464d461bdcffc4422aebfb750eacf99e3c0f27 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 9 Dec 2009 14:20:04 -0800 Subject: IB: Clarify the documentation of ib_post_send() Clarify the behavior of ib_post_send() when a list of work requests is passed in and an immediate error is returned. Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/rdma') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c179318edd9..09509edb1c5 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1425,6 +1425,11 @@ int ib_destroy_qp(struct ib_qp *qp); * @send_wr: A list of work requests to post on the send queue. * @bad_send_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. + * + * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate + * error is returned, the QP state shall not be affected, + * ib_post_send() will return an immediate error after queueing any + * earlier work requests in the list. */ static inline int ib_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, -- cgit v1.2.3-18-g5258