diff options
Diffstat (limited to 'include/rdma/ib_verbs.h')
| -rw-r--r-- | include/rdma/ib_verbs.h | 880 |
1 files changed, 830 insertions, 50 deletions
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 701e7b40560..7ccef342f72 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -34,8 +34,6 @@ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. - * - * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $ */ #if !defined(IB_VERBS_H) @@ -49,10 +47,14 @@ #include <linux/list.h> #include <linux/rwsem.h> #include <linux/scatterlist.h> +#include <linux/workqueue.h> +#include <uapi/linux/if_ether.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/uaccess.h> +extern struct workqueue_struct *ib_wq; + union ib_gid { u8 raw[16]; struct { @@ -66,16 +68,26 @@ enum rdma_node_type { RDMA_NODE_IB_CA = 1, RDMA_NODE_IB_SWITCH, RDMA_NODE_IB_ROUTER, - RDMA_NODE_RNIC + RDMA_NODE_RNIC, + RDMA_NODE_USNIC, + RDMA_NODE_USNIC_UDP, }; enum rdma_transport_type { RDMA_TRANSPORT_IB, - RDMA_TRANSPORT_IWARP + RDMA_TRANSPORT_IWARP, + RDMA_TRANSPORT_USNIC, + RDMA_TRANSPORT_USNIC_UDP }; -enum rdma_transport_type -rdma_node_get_transport(enum rdma_node_type node_type) __attribute_const__; +__attribute_const__ enum rdma_transport_type +rdma_node_get_transport(enum rdma_node_type node_type); + +enum rdma_link_layer { + IB_LINK_LAYER_UNSPECIFIED, + IB_LINK_LAYER_INFINIBAND, + IB_LINK_LAYER_ETHERNET, +}; enum ib_device_cap_flags { IB_DEVICE_RESIZE_MAX_WR = 1, @@ -93,8 +105,8 @@ enum ib_device_cap_flags { IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), IB_DEVICE_SRQ_RESIZE = (1<<13), IB_DEVICE_N_NOTIFY_CQ = (1<<14), - IB_DEVICE_ZERO_STAG = (1<<15), - IB_DEVICE_SEND_W_INV = (1<<16), + IB_DEVICE_LOCAL_DMA_LKEY = (1<<15), + IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */ IB_DEVICE_MEM_WINDOW = (1<<17), /* * Devices should set IB_DEVICE_UD_IP_SUM if they support @@ -104,6 +116,25 @@ enum ib_device_cap_flags { * IPoIB driver may set NETIF_F_IP_CSUM for datagram mode. */ IB_DEVICE_UD_IP_CSUM = (1<<18), + IB_DEVICE_UD_TSO = (1<<19), + IB_DEVICE_XRC = (1<<20), + IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21), + IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22), + IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23), + IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24), + IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29), + IB_DEVICE_SIGNATURE_HANDOVER = (1<<30) +}; + +enum ib_signature_prot_cap { + IB_PROT_T10DIF_TYPE_1 = 1, + IB_PROT_T10DIF_TYPE_2 = 1 << 1, + IB_PROT_T10DIF_TYPE_3 = 1 << 2, +}; + +enum ib_signature_guard_cap { + IB_GUARD_T10DIF_CRC = 1, + IB_GUARD_T10DIF_CSUM = 1 << 1, }; enum ib_atomic_cap { @@ -135,6 +166,7 @@ struct ib_device_attr { int max_qp_init_rd_atom; int max_ee_init_rd_atom; enum ib_atomic_cap atomic_cap; + enum ib_atomic_cap masked_atomic_cap; int max_ee; int max_rdd; int max_mw; @@ -149,8 +181,11 @@ struct ib_device_attr { int max_srq; int max_srq_wr; int max_srq_sge; + unsigned int max_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; + int sig_prot_cap; + int sig_guard_cap; }; enum ib_mtu { @@ -195,6 +230,7 @@ enum ib_port_cap_flags { IB_PORT_SM_DISABLED = 1 << 10, IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14, IB_PORT_CM_SUP = 1 << 16, IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, IB_PORT_REINIT_SUP = 1 << 18, @@ -204,7 +240,8 @@ enum ib_port_cap_flags { IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, IB_PORT_BOOT_MGMT_SUP = 1 << 23, IB_PORT_LINK_LATENCY_SUP = 1 << 24, - IB_PORT_CLIENT_REG_SUP = 1 << 25 + IB_PORT_CLIENT_REG_SUP = 1 << 25, + IB_PORT_IP_BASED_GIDS = 1 << 26 }; enum ib_port_width { @@ -225,6 +262,66 @@ static inline int ib_width_enum_to_int(enum ib_port_width width) } } +enum ib_port_speed { + IB_SPEED_SDR = 1, + IB_SPEED_DDR = 2, + IB_SPEED_QDR = 4, + IB_SPEED_FDR10 = 8, + IB_SPEED_FDR = 16, + IB_SPEED_EDR = 32 +}; + +struct ib_protocol_stats { + /* TBD... */ +}; + +struct iw_protocol_stats { + u64 ipInReceives; + u64 ipInHdrErrors; + u64 ipInTooBigErrors; + u64 ipInNoRoutes; + u64 ipInAddrErrors; + u64 ipInUnknownProtos; + u64 ipInTruncatedPkts; + u64 ipInDiscards; + u64 ipInDelivers; + u64 ipOutForwDatagrams; + u64 ipOutRequests; + u64 ipOutDiscards; + u64 ipOutNoRoutes; + u64 ipReasmTimeout; + u64 ipReasmReqds; + u64 ipReasmOKs; + u64 ipReasmFails; + u64 ipFragOKs; + u64 ipFragFails; + u64 ipFragCreates; + u64 ipInMcastPkts; + u64 ipOutMcastPkts; + u64 ipInBcastPkts; + u64 ipOutBcastPkts; + + u64 tcpRtoAlgorithm; + u64 tcpRtoMin; + u64 tcpRtoMax; + u64 tcpMaxConn; + u64 tcpActiveOpens; + u64 tcpPassiveOpens; + u64 tcpAttemptFails; + u64 tcpEstabResets; + u64 tcpCurrEstab; + u64 tcpInSegs; + u64 tcpOutSegs; + u64 tcpRetransSegs; + u64 tcpInErrs; + u64 tcpOutRsts; +}; + +union rdma_protocol_stats { + struct ib_protocol_stats ib; + struct iw_protocol_stats iw; +}; + struct ib_port_attr { enum ib_port_state state; enum ib_mtu max_mtu; @@ -287,7 +384,8 @@ enum ib_event_type { IB_EVENT_SRQ_ERR, IB_EVENT_SRQ_LIMIT_REACHED, IB_EVENT_QP_LAST_WQE_REACHED, - IB_EVENT_CLIENT_REREGISTER + IB_EVENT_CLIENT_REREGISTER, + IB_EVENT_GID_CHANGE, }; struct ib_event { @@ -335,7 +433,7 @@ enum { IB_MULTICAST_QPN = 0xffffff }; -#define IB_LID_PERMISSIVE __constant_htons(0xFFFF) +#define IB_LID_PERMISSIVE cpu_to_be16(0xFFFF) enum ib_ah_flags { IB_AH_GRH = 1 @@ -351,7 +449,15 @@ enum ib_rate { IB_RATE_40_GBPS = 7, IB_RATE_60_GBPS = 8, IB_RATE_80_GBPS = 9, - IB_RATE_120_GBPS = 10 + IB_RATE_120_GBPS = 10, + IB_RATE_14_GBPS = 11, + IB_RATE_56_GBPS = 12, + IB_RATE_112_GBPS = 13, + IB_RATE_168_GBPS = 14, + IB_RATE_25_GBPS = 15, + IB_RATE_100_GBPS = 16, + IB_RATE_200_GBPS = 17, + IB_RATE_300_GBPS = 18 }; /** @@ -360,14 +466,145 @@ enum ib_rate { * converted to 2, since 5 Gbit/sec is 2 * 2.5 Gbit/sec. * @rate: rate to convert. */ -int ib_rate_to_mult(enum ib_rate rate) __attribute_const__; +__attribute_const__ int ib_rate_to_mult(enum ib_rate rate); + +/** + * ib_rate_to_mbps - Convert the IB rate enum to Mbps. + * For example, IB_RATE_2_5_GBPS will be converted to 2500. + * @rate: rate to convert. + */ +__attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); + +enum ib_mr_create_flags { + IB_MR_SIGNATURE_EN = 1, +}; + +/** + * ib_mr_init_attr - Memory region init attributes passed to routine + * ib_create_mr. + * @max_reg_descriptors: max number of registration descriptors that + * may be used with registration work requests. + * @flags: MR creation flags bit mask. + */ +struct ib_mr_init_attr { + int max_reg_descriptors; + u32 flags; +}; + +enum ib_signature_type { + IB_SIG_TYPE_T10_DIF, +}; + +/** + * T10-DIF Signature types + * T10-DIF types are defined by SCSI + * specifications. + */ +enum ib_t10_dif_type { + IB_T10DIF_NONE, + IB_T10DIF_TYPE1, + IB_T10DIF_TYPE2, + IB_T10DIF_TYPE3 +}; + +/** + * Signature T10-DIF block-guard types + * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. + * IB_T10DIF_CSUM: Corresponds to IP checksum rules. + */ +enum ib_t10_dif_bg_type { + IB_T10DIF_CRC, + IB_T10DIF_CSUM +}; + +/** + * struct ib_t10_dif_domain - Parameters specific for T10-DIF + * domain. + * @type: T10-DIF type (0|1|2|3) + * @bg_type: T10-DIF block guard type (CRC|CSUM) + * @pi_interval: protection information interval. + * @bg: seed of guard computation. + * @app_tag: application tag of guard block + * @ref_tag: initial guard block reference tag. + * @type3_inc_reftag: T10-DIF type 3 does not state + * about the reference tag, it is the user + * choice to increment it or not. + */ +struct ib_t10_dif_domain { + enum ib_t10_dif_type type; + enum ib_t10_dif_bg_type bg_type; + u16 pi_interval; + u16 bg; + u16 app_tag; + u32 ref_tag; + bool type3_inc_reftag; +}; + +/** + * struct ib_sig_domain - Parameters for signature domain + * @sig_type: specific signauture type + * @sig: union of all signature domain attributes that may + * be used to set domain layout. + */ +struct ib_sig_domain { + enum ib_signature_type sig_type; + union { + struct ib_t10_dif_domain dif; + } sig; +}; + +/** + * struct ib_sig_attrs - Parameters for signature handover operation + * @check_mask: bitmask for signature byte check (8 bytes) + * @mem: memory domain layout desciptor. + * @wire: wire domain layout desciptor. + */ +struct ib_sig_attrs { + u8 check_mask; + struct ib_sig_domain mem; + struct ib_sig_domain wire; +}; + +enum ib_sig_err_type { + IB_SIG_BAD_GUARD, + IB_SIG_BAD_REFTAG, + IB_SIG_BAD_APPTAG, +}; + +/** + * struct ib_sig_err - signature error descriptor + */ +struct ib_sig_err { + enum ib_sig_err_type err_type; + u32 expected; + u32 actual; + u64 sig_err_offset; + u32 key; +}; + +enum ib_mr_status_check { + IB_MR_CHECK_SIG_STATUS = 1, +}; + +/** + * struct ib_mr_status - Memory region status container + * + * @fail_status: Bitmask of MR checks status. For each + * failed check a corresponding status bit is set. + * @sig_err: Additional info for IB_MR_CEHCK_SIG_STATUS + * failure. + */ +struct ib_mr_status { + u32 fail_status; + struct ib_sig_err sig_err; +}; /** * mult_to_ib_rate - Convert a multiple of 2.5 Gbit/sec to an IB rate * enum. * @mult: multiple to convert. */ -enum ib_rate mult_to_ib_rate(int mult) __attribute_const__; +__attribute_const__ enum ib_rate mult_to_ib_rate(int mult); struct ib_ah_attr { struct ib_global_route grh; @@ -377,6 +614,8 @@ struct ib_ah_attr { u8 static_rate; u8 ah_flags; u8 port_num; + u8 dmac[ETH_ALEN]; + u16 vlan_id; }; enum ib_wc_status { @@ -411,6 +650,11 @@ enum ib_wc_opcode { IB_WC_COMP_SWAP, IB_WC_FETCH_ADD, IB_WC_BIND_MW, + IB_WC_LSO, + IB_WC_LOCAL_INV, + IB_WC_FAST_REG_MR, + IB_WC_MASKED_COMP_SWAP, + IB_WC_MASKED_FETCH_ADD, /* * Set value of IB_WC_RECV so consumers can test if a completion is a * receive by testing (opcode & IB_WC_RECV). @@ -421,7 +665,11 @@ enum ib_wc_opcode { enum ib_wc_flags { IB_WC_GRH = 1, - IB_WC_WITH_IMM = (1<<1) + IB_WC_WITH_IMM = (1<<1), + IB_WC_WITH_INVALIDATE = (1<<2), + IB_WC_IP_CSUM_OK = (1<<3), + IB_WC_WITH_SMAC = (1<<4), + IB_WC_WITH_VLAN = (1<<5), }; struct ib_wc { @@ -431,7 +679,10 @@ struct ib_wc { u32 vendor_err; u32 byte_len; struct ib_qp *qp; - __be32 imm_data; + union { + __be32 imm_data; + u32 invalidate_rkey; + } ex; u32 src_qp; int wc_flags; u16 pkey_index; @@ -439,7 +690,8 @@ struct ib_wc { u8 sl; u8 dlid_path_bits; u8 port_num; /* valid only for DR SMPs on switches */ - int csum_ok; + u8 smac[ETH_ALEN]; + u16 vlan_id; }; enum ib_cq_notify_flags { @@ -449,6 +701,11 @@ enum ib_cq_notify_flags { IB_CQ_REPORT_MISSED_EVENTS = 1 << 2, }; +enum ib_srq_type { + IB_SRQT_BASIC, + IB_SRQT_XRC +}; + enum ib_srq_attr_mask { IB_SRQ_MAX_WR = 1 << 0, IB_SRQ_LIMIT = 1 << 1, @@ -464,6 +721,14 @@ struct ib_srq_init_attr { void (*event_handler)(struct ib_event *, void *); void *srq_context; struct ib_srq_attr attr; + enum ib_srq_type srq_type; + + union { + struct { + struct ib_xrcd *xrcd; + struct ib_cq *cq; + } xrc; + } ext; }; struct ib_qp_cap { @@ -492,21 +757,65 @@ enum ib_qp_type { IB_QPT_UC, IB_QPT_UD, IB_QPT_RAW_IPV6, - IB_QPT_RAW_ETY + IB_QPT_RAW_ETHERTYPE, + IB_QPT_RAW_PACKET = 8, + IB_QPT_XRC_INI = 9, + IB_QPT_XRC_TGT, + IB_QPT_MAX, + /* Reserve a range for qp types internal to the low level driver. + * These qp types will not be visible at the IB core layer, so the + * IB_QPT_MAX usages should not be affected in the core layer + */ + IB_QPT_RESERVED1 = 0x1000, + IB_QPT_RESERVED2, + IB_QPT_RESERVED3, + IB_QPT_RESERVED4, + IB_QPT_RESERVED5, + IB_QPT_RESERVED6, + IB_QPT_RESERVED7, + IB_QPT_RESERVED8, + IB_QPT_RESERVED9, + IB_QPT_RESERVED10, +}; + +enum ib_qp_create_flags { + IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + IB_QP_CREATE_NETIF_QP = 1 << 5, + IB_QP_CREATE_SIGNATURE_EN = 1 << 6, + IB_QP_CREATE_USE_GFP_NOIO = 1 << 7, + /* reserve bits 26-31 for low level drivers' internal use */ + IB_QP_CREATE_RESERVED_START = 1 << 26, + IB_QP_CREATE_RESERVED_END = 1 << 31, }; + +/* + * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler + * callback to destroy the passed in QP. + */ + struct ib_qp_init_attr { void (*event_handler)(struct ib_event *, void *); void *qp_context; struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; + struct ib_xrcd *xrcd; /* XRC TGT QPs only */ struct ib_qp_cap cap; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; + enum ib_qp_create_flags create_flags; u8 port_num; /* special QP types only */ }; +struct ib_qp_open_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + u32 qp_num; + enum ib_qp_type qp_type; +}; + enum ib_rnr_timeout { IB_RNR_TIMER_655_36 = 0, IB_RNR_TIMER_000_01 = 1, @@ -563,7 +872,11 @@ enum ib_qp_attr_mask { IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), IB_QP_PATH_MIG_STATE = (1<<18), IB_QP_CAP = (1<<19), - IB_QP_DEST_QPN = (1<<20) + IB_QP_DEST_QPN = (1<<20), + IB_QP_SMAC = (1<<21), + IB_QP_ALT_SMAC = (1<<22), + IB_QP_VID = (1<<23), + IB_QP_ALT_VID = (1<<24), }; enum ib_qp_state { @@ -582,6 +895,11 @@ enum ib_mig_state { IB_MIG_ARMED }; +enum ib_mw_type { + IB_MW_TYPE_1 = 1, + IB_MW_TYPE_2 = 2 +}; + struct ib_qp_attr { enum ib_qp_state qp_state; enum ib_qp_state cur_qp_state; @@ -608,6 +926,10 @@ struct ib_qp_attr { u8 rnr_retry; u8 alt_port_num; u8 alt_timeout; + u8 smac[ETH_ALEN]; + u8 alt_smac[ETH_ALEN]; + u16 vlan_id; + u16 alt_vlan_id; }; enum ib_wr_opcode { @@ -617,7 +939,29 @@ enum ib_wr_opcode { IB_WR_SEND_WITH_IMM, IB_WR_RDMA_READ, IB_WR_ATOMIC_CMP_AND_SWP, - IB_WR_ATOMIC_FETCH_AND_ADD + IB_WR_ATOMIC_FETCH_AND_ADD, + IB_WR_LSO, + IB_WR_SEND_WITH_INV, + IB_WR_RDMA_READ_WITH_INV, + IB_WR_LOCAL_INV, + IB_WR_FAST_REG_MR, + IB_WR_MASKED_ATOMIC_CMP_AND_SWP, + IB_WR_MASKED_ATOMIC_FETCH_AND_ADD, + IB_WR_BIND_MW, + IB_WR_REG_SIG_MR, + /* reserve values for low level drivers' internal use. + * These values will not be used at all in the ib core layer. + */ + IB_WR_RESERVED1 = 0xf0, + IB_WR_RESERVED2, + IB_WR_RESERVED3, + IB_WR_RESERVED4, + IB_WR_RESERVED5, + IB_WR_RESERVED6, + IB_WR_RESERVED7, + IB_WR_RESERVED8, + IB_WR_RESERVED9, + IB_WR_RESERVED10, }; enum ib_send_flags { @@ -625,7 +969,11 @@ enum ib_send_flags { IB_SEND_SIGNALED = (1<<1), IB_SEND_SOLICITED = (1<<2), IB_SEND_INLINE = (1<<3), - IB_SEND_IP_CSUM = (1<<4) + IB_SEND_IP_CSUM = (1<<4), + + /* reserve bits 26-31 for low level drivers' internal use */ + IB_SEND_RESERVED_START = (1 << 26), + IB_SEND_RESERVED_END = (1 << 31), }; struct ib_sge { @@ -634,6 +982,29 @@ struct ib_sge { u32 lkey; }; +struct ib_fast_reg_page_list { + struct ib_device *device; + u64 *page_list; + unsigned int max_page_list_len; +}; + +/** + * struct ib_mw_bind_info - Parameters for a memory window bind operation. + * @mr: A memory region to bind the memory window to. + * @addr: The address where the memory window should begin. + * @length: The length of the memory window, in bytes. + * @mw_access_flags: Access flags from enum ib_access_flags for the window. + * + * This struct contains the shared parameters for type 1 and type 2 + * memory window bind operations. + */ +struct ib_mw_bind_info { + struct ib_mr *mr; + u64 addr; + u64 length; + int mw_access_flags; +}; + struct ib_send_wr { struct ib_send_wr *next; u64 wr_id; @@ -641,7 +1012,10 @@ struct ib_send_wr { int num_sge; enum ib_wr_opcode opcode; int send_flags; - __be32 imm_data; + union { + __be32 imm_data; + u32 invalidate_rkey; + } ex; union { struct { u64 remote_addr; @@ -651,16 +1025,43 @@ struct ib_send_wr { u64 remote_addr; u64 compare_add; u64 swap; + u64 compare_add_mask; + u64 swap_mask; u32 rkey; } atomic; struct { struct ib_ah *ah; + void *header; + int hlen; + int mss; u32 remote_qpn; u32 remote_qkey; u16 pkey_index; /* valid for GSI only */ u8 port_num; /* valid for DR SMPs on switch only */ } ud; + struct { + u64 iova_start; + struct ib_fast_reg_page_list *page_list; + unsigned int page_shift; + unsigned int page_list_len; + u32 length; + int access_flags; + u32 rkey; + } fast_reg; + struct { + struct ib_mw *mw; + /* The new rkey for the memory window. */ + u32 rkey; + struct ib_mw_bind_info bind_info; + } bind_mw; + struct { + struct ib_sig_attrs *sig_attrs; + struct ib_mr *sig_mr; + int access_flags; + struct ib_sge *prot; + } sig_handover; } wr; + u32 xrc_remote_srq_num; /* XRC TGT QPs only */ }; struct ib_recv_wr { @@ -675,7 +1076,8 @@ enum ib_access_flags { IB_ACCESS_REMOTE_WRITE = (1<<1), IB_ACCESS_REMOTE_READ = (1<<2), IB_ACCESS_REMOTE_ATOMIC = (1<<3), - IB_ACCESS_MW_BIND = (1<<4) + IB_ACCESS_MW_BIND = (1<<4), + IB_ZERO_BASED = (1<<5) }; struct ib_phys_buf { @@ -698,13 +1100,16 @@ enum ib_mr_rereg_flags { IB_MR_REREG_ACCESS = (1<<2) }; +/** + * struct ib_mw_bind - Parameters for a type 1 memory window bind operation. + * @wr_id: Work request id. + * @send_flags: Flags from ib_send_flags enum. + * @bind_info: More parameters of the bind operation. + */ struct ib_mw_bind { - struct ib_mr *mr; - u64 wr_id; - u64 addr; - u32 length; - int send_flags; - int mw_access_flags; + u64 wr_id; + int send_flags; + struct ib_mw_bind_info bind_info; }; struct ib_fmr_attr { @@ -722,6 +1127,8 @@ struct ib_ucontext { struct list_head qp_list; struct list_head srq_list; struct list_head ah_list; + struct list_head xrcd_list; + struct list_head rule_list; int closing; }; @@ -730,14 +1137,14 @@ struct ib_uobject { struct ib_ucontext *context; /* associated user context */ void *object; /* containing object */ struct list_head list; /* link to context's list */ - u32 id; /* index into kernel idr */ + int id; /* index into kernel idr */ struct kref ref; struct rw_semaphore mutex; /* protects .live */ int live; }; struct ib_udata { - void __user *inbuf; + const void __user *inbuf; void __user *outbuf; size_t inlen; size_t outlen; @@ -749,6 +1156,15 @@ struct ib_pd { atomic_t usecnt; /* count all resources */ }; +struct ib_xrcd { + struct ib_device *device; + atomic_t usecnt; /* count all exposed resources */ + struct inode *inode; + + struct mutex tgt_qp_mutex; + struct list_head tgt_qp_list; +}; + struct ib_ah { struct ib_device *device; struct ib_pd *pd; @@ -762,7 +1178,7 @@ struct ib_cq { struct ib_uobject *uobject; ib_comp_handler comp_handler; void (*event_handler)(struct ib_event *, void *); - void * cq_context; + void *cq_context; int cqe; atomic_t usecnt; /* count number of work queues */ }; @@ -773,7 +1189,16 @@ struct ib_srq { struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *srq_context; + enum ib_srq_type srq_type; atomic_t usecnt; + + union { + struct { + struct ib_xrcd *xrcd; + struct ib_cq *cq; + u32 srq_num; + } xrc; + } ext; }; struct ib_qp { @@ -782,6 +1207,12 @@ struct ib_qp { struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; + struct ib_xrcd *xrcd; /* XRC TGT QPs only */ + struct list_head xrcd_list; + /* count times opened, mcast attaches, flow attaches */ + atomic_t usecnt; + struct list_head open_list; + struct ib_qp *real_qp; struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; @@ -803,6 +1234,7 @@ struct ib_mw { struct ib_pd *pd; struct ib_uobject *uobject; u32 rkey; + enum ib_mw_type type; }; struct ib_fmr { @@ -813,6 +1245,126 @@ struct ib_fmr { u32 rkey; }; +/* Supported steering options */ +enum ib_flow_attr_type { + /* steering according to rule specifications */ + IB_FLOW_ATTR_NORMAL = 0x0, + /* default unicast and multicast rule - + * receive all Eth traffic which isn't steered to any QP + */ + IB_FLOW_ATTR_ALL_DEFAULT = 0x1, + /* default multicast rule - + * receive all Eth multicast traffic which isn't steered to any QP + */ + IB_FLOW_ATTR_MC_DEFAULT = 0x2, + /* sniffer rule - receive all port traffic */ + IB_FLOW_ATTR_SNIFFER = 0x3 +}; + +/* Supported steering header types */ +enum ib_flow_spec_type { + /* L2 headers*/ + IB_FLOW_SPEC_ETH = 0x20, + IB_FLOW_SPEC_IB = 0x22, + /* L3 header*/ + IB_FLOW_SPEC_IPV4 = 0x30, + /* L4 headers*/ + IB_FLOW_SPEC_TCP = 0x40, + IB_FLOW_SPEC_UDP = 0x41 +}; +#define IB_FLOW_SPEC_LAYER_MASK 0xF0 +#define IB_FLOW_SPEC_SUPPORT_LAYERS 4 + +/* Flow steering rule priority is set according to it's domain. + * Lower domain value means higher priority. + */ +enum ib_flow_domain { + IB_FLOW_DOMAIN_USER, + IB_FLOW_DOMAIN_ETHTOOL, + IB_FLOW_DOMAIN_RFS, + IB_FLOW_DOMAIN_NIC, + IB_FLOW_DOMAIN_NUM /* Must be last */ +}; + +struct ib_flow_eth_filter { + u8 dst_mac[6]; + u8 src_mac[6]; + __be16 ether_type; + __be16 vlan_tag; +}; + +struct ib_flow_spec_eth { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_eth_filter val; + struct ib_flow_eth_filter mask; +}; + +struct ib_flow_ib_filter { + __be16 dlid; + __u8 sl; +}; + +struct ib_flow_spec_ib { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ib_filter val; + struct ib_flow_ib_filter mask; +}; + +struct ib_flow_ipv4_filter { + __be32 src_ip; + __be32 dst_ip; +}; + +struct ib_flow_spec_ipv4 { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_ipv4_filter val; + struct ib_flow_ipv4_filter mask; +}; + +struct ib_flow_tcp_udp_filter { + __be16 dst_port; + __be16 src_port; +}; + +struct ib_flow_spec_tcp_udp { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_tcp_udp_filter val; + struct ib_flow_tcp_udp_filter mask; +}; + +union ib_flow_spec { + struct { + enum ib_flow_spec_type type; + u16 size; + }; + struct ib_flow_spec_eth eth; + struct ib_flow_spec_ib ib; + struct ib_flow_spec_ipv4 ipv4; + struct ib_flow_spec_tcp_udp tcp_udp; +}; + +struct ib_flow_attr { + enum ib_flow_attr_type type; + u16 size; + u16 priority; + u32 flags; + u8 num_of_specs; + u8 port; + /* Following are the optional layers according to user request + * struct ib_flow_spec_xxx + * struct ib_flow_spec_yyy + */ +}; + +struct ib_flow { + struct ib_qp *qp; + struct ib_uobject *uobject; +}; + struct ib_mad; struct ib_grh; @@ -861,14 +1413,10 @@ struct ib_dma_mapping_ops { void (*unmap_sg)(struct ib_device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); - u64 (*dma_address)(struct ib_device *dev, - struct scatterlist *sg); - unsigned int (*dma_len)(struct ib_device *dev, - struct scatterlist *sg); void (*sync_single_for_cpu)(struct ib_device *dev, u64 dma_handle, size_t size, - enum dma_data_direction dir); + enum dma_data_direction dir); void (*sync_single_for_device)(struct ib_device *dev, u64 dma_handle, size_t size, @@ -892,9 +1440,9 @@ struct ib_device { struct list_head event_handler_list; spinlock_t event_handler_lock; + spinlock_t client_data_lock; struct list_head core_list; struct list_head client_data_list; - spinlock_t client_data_lock; struct ib_cache cache; int *pkey_tbl_len; @@ -904,11 +1452,15 @@ struct ib_device { struct iw_cm_verbs *iwcm; + int (*get_protocol_stats)(struct ib_device *device, + union rdma_protocol_stats *stats); int (*query_device)(struct ib_device *device, struct ib_device_attr *device_attr); int (*query_port)(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); + enum rdma_link_layer (*get_link_layer)(struct ib_device *device, + u8 port_num); int (*query_gid)(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); @@ -971,6 +1523,8 @@ struct ib_device { int comp_vector, struct ib_ucontext *context, struct ib_udata *udata); + int (*modify_cq)(struct ib_cq *cq, u16 cq_count, + u16 cq_period); int (*destroy_cq)(struct ib_cq *cq); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); @@ -996,6 +1550,14 @@ struct ib_device { int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); + int (*destroy_mr)(struct ib_mr *mr); + struct ib_mr * (*create_mr)(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); + struct ib_mr * (*alloc_fast_reg_mr)(struct ib_pd *pd, + int max_page_list_len); + struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device, + int page_list_len); + void (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list); int (*rereg_phys_mr)(struct ib_mr *mr, int mr_rereg_mask, struct ib_pd *pd, @@ -1003,7 +1565,8 @@ struct ib_device { int num_phys_buf, int mr_access_flags, u64 *iova_start); - struct ib_mw * (*alloc_mw)(struct ib_pd *pd); + struct ib_mw * (*alloc_mw)(struct ib_pd *pd, + enum ib_mw_type type); int (*bind_mw)(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); @@ -1029,11 +1592,22 @@ struct ib_device { struct ib_grh *in_grh, struct ib_mad *in_mad, struct ib_mad *out_mad); + struct ib_xrcd * (*alloc_xrcd)(struct ib_device *device, + struct ib_ucontext *ucontext, + struct ib_udata *udata); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd); + struct ib_flow * (*create_flow)(struct ib_qp *qp, + struct ib_flow_attr + *flow_attr, + int domain); + int (*destroy_flow)(struct ib_flow *flow_id); + int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); struct ib_dma_mapping_ops *dma_ops; struct module *owner; - struct class_device class_dev; + struct device dev; struct kobject *ports_parent; struct list_head port_list; @@ -1043,11 +1617,13 @@ struct ib_device { IB_DEV_UNREGISTERED } reg_state; - u64 uverbs_cmd_mask; int uverbs_abi_ver; + u64 uverbs_cmd_mask; + u64 uverbs_ex_cmd_mask; char node_desc[64]; __be64 node_guid; + u32 local_dma_lkey; u8 node_type; u8 phys_port_cnt; }; @@ -1063,7 +1639,9 @@ struct ib_client { struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device); -int ib_register_device (struct ib_device *device); +int ib_register_device(struct ib_device *device, + int (*port_callback)(struct ib_device *, + u8, struct kobject *)); void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); @@ -1091,6 +1669,7 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len * @next_state: Next QP state * @type: QP type * @mask: Mask of supplied QP attributes + * @ll : link layer of port * * This function is a helper function that a low-level driver's * modify_qp method can use to validate the consumer's input. It @@ -1099,7 +1678,8 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len * and that the attribute mask supplied is allowed for the transition. */ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, - enum ib_qp_type type, enum ib_qp_attr_mask mask); + enum ib_qp_type type, enum ib_qp_attr_mask mask, + enum rdma_link_layer ll); int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); @@ -1111,6 +1691,9 @@ int ib_query_device(struct ib_device *device, int ib_query_port(struct ib_device *device, u8 port_num, struct ib_port_attr *port_attr); +enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, + u8 port_num); + int ib_query_gid(struct ib_device *device, u8 port_num, int index, union ib_gid *gid); @@ -1317,12 +1900,36 @@ int ib_query_qp(struct ib_qp *qp, int ib_destroy_qp(struct ib_qp *qp); /** + * ib_open_qp - Obtain a reference to an existing sharable QP. + * @xrcd - XRC domain + * @qp_open_attr: Attributes identifying the QP to open. + * + * Returns a reference to a sharable QP. + */ +struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd, + struct ib_qp_open_attr *qp_open_attr); + +/** + * ib_close_qp - Release an external reference to a QP. + * @qp: The QP handle to release + * + * The opened QP handle is released by the caller. The underlying + * shared QP is not destroyed until all internal references are released. + */ +int ib_close_qp(struct ib_qp *qp); + +/** * ib_post_send - Posts a list of work requests to the send queue of * the specified QP. * @qp: The QP to post the work request on. * @send_wr: A list of work requests to post on the send queue. * @bad_send_wr: On an immediate failure, this parameter will reference * the work request that failed to be posted on the QP. + * + * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate + * error is returned, the QP state shall not be affected, + * ib_post_send() will return an immediate error after queueing any + * earlier work requests in the list. */ static inline int ib_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr, @@ -1376,6 +1983,15 @@ struct ib_cq *ib_create_cq(struct ib_device *device, int ib_resize_cq(struct ib_cq *cq, int cqe); /** + * ib_modify_cq - Modifies moderation params of the CQ + * @cq: The CQ to modify. + * @cq_count: number of CQEs that will trigger an event + * @cq_period: max period of time in usec before triggering an event + * + */ +int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); + +/** * ib_destroy_cq - Destroys the specified CQ. * @cq: The CQ to destroy. */ @@ -1479,7 +2095,7 @@ static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr) { if (dev->dma_ops) return dev->dma_ops->mapping_error(dev, dma_addr); - return dma_mapping_error(dma_addr); + return dma_mapping_error(dev->dma_device, dma_addr); } /** @@ -1515,6 +2131,24 @@ static inline void ib_dma_unmap_single(struct ib_device *dev, dma_unmap_single(dev->dma_device, addr, size, direction); } +static inline u64 ib_dma_map_single_attrs(struct ib_device *dev, + void *cpu_addr, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return dma_map_single_attrs(dev->dma_device, cpu_addr, size, + direction, attrs); +} + +static inline void ib_dma_unmap_single_attrs(struct ib_device *dev, + u64 addr, size_t size, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return dma_unmap_single_attrs(dev->dma_device, addr, size, + direction, attrs); +} + /** * ib_dma_map_page - Map a physical page to DMA address * @dev: The device for which the dma_addr is to be created @@ -1584,16 +2218,32 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev, dma_unmap_sg(dev->dma_device, sg, nents, direction); } +static inline int ib_dma_map_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + return dma_map_sg_attrs(dev->dma_device, sg, nents, direction, attrs); +} + +static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, attrs); +} /** * ib_sg_dma_address - Return the DMA address from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry + * + * Note: this function is obsolete. To do: change all occurrences of + * ib_sg_dma_address() into sg_dma_address(). */ static inline u64 ib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) { - if (dev->dma_ops) - return dev->dma_ops->dma_address(dev, sg); return sg_dma_address(sg); } @@ -1601,12 +2251,13 @@ static inline u64 ib_sg_dma_address(struct ib_device *dev, * ib_sg_dma_len - Return the DMA length from a scatter/gather entry * @dev: The device for which the DMA addresses were created * @sg: The scatter/gather entry + * + * Note: this function is obsolete. To do: change all occurrences of + * ib_sg_dma_len() into sg_dma_len(). */ static inline unsigned int ib_sg_dma_len(struct ib_device *dev, struct scatterlist *sg) { - if (dev->dma_ops) - return dev->dma_ops->dma_len(dev, sg); return sg_dma_len(sg); } @@ -1744,14 +2395,95 @@ int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); * ib_dereg_mr - Deregisters a memory region and removes it from the * HCA translation table. * @mr: The memory region to deregister. + * + * This function can fail, if the memory region has memory windows bound to it. */ int ib_dereg_mr(struct ib_mr *mr); + +/** + * ib_create_mr - Allocates a memory region that may be used for + * signature handover operations. + * @pd: The protection domain associated with the region. + * @mr_init_attr: memory region init attributes. + */ +struct ib_mr *ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); + +/** + * ib_destroy_mr - Destroys a memory region that was created using + * ib_create_mr and removes it from HW translation tables. + * @mr: The memory region to destroy. + * + * This function can fail, if the memory region has memory windows bound to it. + */ +int ib_destroy_mr(struct ib_mr *mr); + +/** + * ib_alloc_fast_reg_mr - Allocates memory region usable with the + * IB_WR_FAST_REG_MR send work request. + * @pd: The protection domain associated with the region. + * @max_page_list_len: requested max physical buffer list length to be + * used with fast register work requests for this MR. + */ +struct ib_mr *ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); + +/** + * ib_alloc_fast_reg_page_list - Allocates a page list array + * @device - ib device pointer. + * @page_list_len - size of the page list array to be allocated. + * + * This allocates and returns a struct ib_fast_reg_page_list * and a + * page_list array that is at least page_list_len in size. The actual + * size is returned in max_page_list_len. The caller is responsible + * for initializing the contents of the page_list array before posting + * a send work request with the IB_WC_FAST_REG_MR opcode. + * + * The page_list array entries must be translated using one of the + * ib_dma_*() functions just like the addresses passed to + * ib_map_phys_fmr(). Once the ib_post_send() is issued, the struct + * ib_fast_reg_page_list must not be modified by the caller until the + * IB_WC_FAST_REG_MR work request completes. + */ +struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list( + struct ib_device *device, int page_list_len); + +/** + * ib_free_fast_reg_page_list - Deallocates a previously allocated + * page list array. + * @page_list - struct ib_fast_reg_page_list pointer to be deallocated. + */ +void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list); + +/** + * ib_update_fast_reg_key - updates the key portion of the fast_reg MR + * R_Key and L_Key. + * @mr - struct ib_mr pointer to be updated. + * @newkey - new key to be used. + */ +static inline void ib_update_fast_reg_key(struct ib_mr *mr, u8 newkey) +{ + mr->lkey = (mr->lkey & 0xffffff00) | newkey; + mr->rkey = (mr->rkey & 0xffffff00) | newkey; +} + +/** + * ib_inc_rkey - increments the key portion of the given rkey. Can be used + * for calculating a new rkey for type 2 memory windows. + * @rkey - the rkey to increment. + */ +static inline u32 ib_inc_rkey(u32 rkey) +{ + const u32 mask = 0x000000ff; + return ((rkey + 1) & mask) | (rkey & ~mask); +} + /** * ib_alloc_mw - Allocates a memory window. * @pd: The protection domain associated with the memory window. + * @type: The type of the memory window (1 or 2). */ -struct ib_mw *ib_alloc_mw(struct ib_pd *pd); +struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); /** * ib_bind_mw - Posts a work request to the send queue of the specified @@ -1761,6 +2493,10 @@ struct ib_mw *ib_alloc_mw(struct ib_pd *pd); * @mw: The memory window to bind. * @mw_bind: Specifies information about the memory window, including * its address range, remote access rights, and associated memory region. + * + * If there is no immediate error, the function will update the rkey member + * of the mw parameter to its new value. The bind operation can still fail + * asynchronously. */ static inline int ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, @@ -1839,4 +2575,48 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); */ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); +/** + * ib_alloc_xrcd - Allocates an XRC domain. + * @device: The device on which to allocate the XRC domain. + */ +struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device); + +/** + * ib_dealloc_xrcd - Deallocates an XRC domain. + * @xrcd: The XRC domain to deallocate. + */ +int ib_dealloc_xrcd(struct ib_xrcd *xrcd); + +struct ib_flow *ib_create_flow(struct ib_qp *qp, + struct ib_flow_attr *flow_attr, int domain); +int ib_destroy_flow(struct ib_flow *flow_id); + +static inline int ib_check_mr_access(int flags) +{ + /* + * Local write permission is required if remote write or + * remote atomic permission is also requested. + */ + if (flags & (IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_REMOTE_WRITE) && + !(flags & IB_ACCESS_LOCAL_WRITE)) + return -EINVAL; + + return 0; +} + +/** + * ib_check_mr_status: lightweight check of MR status. + * This routine may provide status checks on a selected + * ib_mr. first use is for signature status check. + * + * @mr: A memory region. + * @check_mask: Bitmask of which checks to perform from + * ib_mr_status_check enumeration. + * @mr_status: The container of relevant status checks. + * failed checks will be indicated in the status bitmask + * and the relevant info shall be in the error item. + */ +int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, + struct ib_mr_status *mr_status); + #endif /* IB_VERBS_H */ |
