diff options
Diffstat (limited to 'drivers/infiniband/hw/cxgb4')
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/Kconfig | 8 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 1565 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/cq.c | 351 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 416 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/ev.c | 8 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/id_table.c | 4 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 116 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/mem.c | 213 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 65 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 366 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/resource.c | 10 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 117 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 15 | ||||
| -rw-r--r-- | drivers/infiniband/hw/cxgb4/user.h | 7 |
14 files changed, 2316 insertions, 945 deletions
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index 6b7e6c54353..23f38cf2c5c 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,10 +1,10 @@ config INFINIBAND_CXGB4 - tristate "Chelsio T4 RDMA Driver" - depends on CHELSIO_T4 && INET + tristate "Chelsio T4/T5 RDMA Driver" + depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) select GENERIC_ALLOCATOR ---help--- - This is an iWARP/RDMA driver for the Chelsio T4 1GbE and - 10GbE adapters. + This is an iWARP/RDMA driver for the Chelsio T4 and T5 + 1GbE, 10GbE adapters and T5 40GbE adapter. For general information about Chelsio and our products, visit our website at <http://www.chelsio.com>. diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c13745cde7f..768a0fb67dd 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. + * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -44,6 +44,10 @@ #include <net/netevent.h> #include <net/route.h> #include <net/tcp.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> + +#include <rdma/ib_addr.h> #include "iw_cxgb4.h" @@ -96,9 +100,9 @@ int c4iw_debug; module_param(c4iw_debug, int, 0644); MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)"); -static int peer2peer; +static int peer2peer = 1; module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; module_param(p2p_type, int, 0644); @@ -143,30 +147,43 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status); static LIST_HEAD(timeout_list); static spinlock_t timeout_lock; +static void deref_qp(struct c4iw_ep *ep) +{ + c4iw_qp_rem_ref(&ep->com.qp->ibqp); + clear_bit(QP_REFERENCED, &ep->com.flags); +} + +static void ref_qp(struct c4iw_ep *ep) +{ + set_bit(QP_REFERENCED, &ep->com.flags); + c4iw_qp_add_ref(&ep->com.qp->ibqp); +} + static void start_ep_timer(struct c4iw_ep *ep) { PDBG("%s ep %p\n", __func__, ep); if (timer_pending(&ep->timer)) { - PDBG("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - c4iw_get_ep(&ep->com); + pr_err("%s timer already started! ep %p\n", + __func__, ep); + return; + } + clear_bit(TIMEOUT, &ep->com.flags); + c4iw_get_ep(&ep->com); ep->timer.expires = jiffies + ep_timeout_secs * HZ; ep->timer.data = (unsigned long)ep; ep->timer.function = ep_timeout; add_timer(&ep->timer); } -static void stop_ep_timer(struct c4iw_ep *ep) +static int stop_ep_timer(struct c4iw_ep *ep) { - PDBG("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - WARN(1, "%s timer stopped when its not running! " - "ep %p state %u\n", __func__, ep, ep->com.state); - return; - } + PDBG("%s ep %p stopping\n", __func__, ep); del_timer_sync(&ep->timer); - c4iw_put_ep(&ep->com); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + c4iw_put_ep(&ep->com); + return 0; + } + return 1; } static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, @@ -217,12 +234,16 @@ static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) static void set_emss(struct c4iw_ep *ep, u16 opt) { - ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40; + ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - + sizeof(struct iphdr) - sizeof(struct tcphdr); ep->mss = ep->emss; if (GET_TCPOPT_TSTAMP(opt)) ep->emss -= 12; if (ep->emss < 128) ep->emss = 128; + if (ep->emss & 7) + PDBG("Warning: misaligned mtu idx %u mss %u emss=%u\n", + GET_TCPOPT_MSS(opt), ep->mss, ep->emss); PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt), ep->mss, ep->emss); } @@ -271,11 +292,19 @@ void _c4iw_free_ep(struct kref *kref) ep = container_of(kref, struct c4iw_ep, com.kref); PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(QP_REFERENCED, &ep->com.flags)) + deref_qp(ep); if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); - remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); + } + if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) { + print_addr(&ep->com, __func__, "remove_mapinfo/mapping"); + iwpm_remove_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr); + iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); } kfree(ep); } @@ -318,22 +347,78 @@ static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp) } else { skb = alloc_skb(len, gfp); } + t4_set_arp_err_handler(skb, NULL, NULL); return skb; } -static struct rtable *find_route(struct c4iw_dev *dev, __be32 local_ip, +static struct net_device *get_real_dev(struct net_device *egress_dev) +{ + return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; +} + +static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) +{ + int i; + + egress_dev = get_real_dev(egress_dev); + for (i = 0; i < dev->rdev.lldi.nports; i++) + if (dev->rdev.lldi.ports[i] == egress_dev) + return 1; + return 0; +} + +static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip, + __u8 *peer_ip, __be16 local_port, + __be16 peer_port, u8 tos, + __u32 sin6_scope_id) +{ + struct dst_entry *dst = NULL; + + if (IS_ENABLED(CONFIG_IPV6)) { + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, peer_ip, 16); + memcpy(&fl6.saddr, local_ip, 16); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = sin6_scope_id; + dst = ip6_route_output(&init_net, NULL, &fl6); + if (!dst) + goto out; + if (!our_interface(dev, ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + dst_release(dst); + dst = NULL; + } + } + +out: + return dst; +} + +static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, __be32 peer_ip, __be16 local_port, __be16 peer_port, u8 tos) { struct rtable *rt; struct flowi4 fl4; + struct neighbour *n; rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, peer_port, local_port, IPPROTO_TCP, tos, 0); if (IS_ERR(rt)) return NULL; - return rt; + n = dst_neigh_lookup(&rt->dst, &peer_ip); + if (!n) + return NULL; + if (!our_interface(dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { + dst_release(&rt->dst); + return NULL; + } + neigh_release(n); + return &rt->dst; } static void arp_failure_discard(void *handle, struct sk_buff *skb) @@ -347,8 +432,17 @@ static void arp_failure_discard(void *handle, struct sk_buff *skb) */ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb) { + struct c4iw_ep *ep = handle; + printk(KERN_ERR MOD "ARP failure duing connect\n"); kfree_skb(skb); + connect_reply_upcall(ep, -EHOSTUNREACH); + state_set(&ep->com, DEAD); + remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); + cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); + dst_release(ep->dst); + cxgb4_l2t_release(ep->l2t); + c4iw_put_ep(&ep->com); } /* @@ -392,7 +486,7 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb) flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq); flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; - flowc->mnemval[6].val = cpu_to_be32(snd_win); + flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); /* Pad WR to 16 byte boundary */ @@ -452,59 +546,80 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } -#define VLAN_NONE 0xfff -#define FILTER_SEL_VLAN_NONE 0xffff -#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */ -#define FILTER_SEL_WIDTH_VIN_P_FC \ - (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/ -#define FILTER_SEL_WIDTH_TAG_P_FC \ - (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */ -#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) +/* + * c4iw_form_pm_msg - Form a port mapper message with mapping info + */ +static void c4iw_form_pm_msg(struct c4iw_ep *ep, + struct iwpm_sa_data *pm_msg) +{ + memcpy(&pm_msg->loc_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&pm_msg->rem_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); +} + +/* + * c4iw_form_reg_msg - Form a port mapper message with dev info + */ +static void c4iw_form_reg_msg(struct c4iw_dev *dev, + struct iwpm_dev_data *pm_msg) +{ + memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE); + memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name, + IWPM_IFNAME_SIZE); +} -static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst, - struct l2t_entry *l2t) +static void c4iw_record_pm_msg(struct c4iw_ep *ep, + struct iwpm_sa_data *pm_msg) { - unsigned int ntuple = 0; - u32 viid; + memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr, + sizeof(ep->com.mapped_local_addr)); + memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr, + sizeof(ep->com.mapped_remote_addr)); +} - switch (dev->rdev.lldi.filt_mode) { +static void best_mtu(const unsigned short *mtus, unsigned short mtu, + unsigned int *idx, int use_ts) +{ + unsigned short hdr_size = sizeof(struct iphdr) + + sizeof(struct tcphdr) + + (use_ts ? 12 : 0); + unsigned short data_size = mtu - hdr_size; - /* default filter mode */ - case HW_TPL_FR_MT_PR_IV_P_FC: - if (l2t->vlan == VLAN_NONE) - ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; - else { - ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC; - ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; - } - ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << - FILTER_SEL_WIDTH_VLD_TAG_P_FC; - break; - case HW_TPL_FR_MT_PR_OV_P_FC: { - viid = cxgb4_port_viid(l2t->neigh->dev); - - ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC; - ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC; - ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC; - ntuple |= l2t->lport << S_PORT | IPPROTO_TCP << - FILTER_SEL_WIDTH_VLD_TAG_P_FC; - break; - } - default: - break; - } - return ntuple; + cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); } static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req; + struct cpl_t5_act_open_req *t5_req; + struct cpl_act_open_req6 *req6; + struct cpl_t5_act_open_req6 *t5_req6; struct sk_buff *skb; u64 opt0; u32 opt2; unsigned int mtu_idx; int wscale; - int wrlen = roundup(sizeof *req, 16); + int wrlen; + int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? + sizeof(struct cpl_act_open_req) : + sizeof(struct cpl_t5_act_open_req); + int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ? + sizeof(struct cpl_act_open_req6) : + sizeof(struct cpl_t5_act_open_req6); + struct sockaddr_in *la = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + struct sockaddr_in *ra = (struct sockaddr_in *) + &ep->com.mapped_remote_addr; + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *) + &ep->com.mapped_remote_addr; + int win; + + wrlen = (ep->com.remote_addr.ss_family == AF_INET) ? + roundup(sizev4, 16) : + roundup(sizev6, 16); PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid); @@ -516,8 +631,18 @@ static int send_connect(struct c4iw_ep *ep) } set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps); wscale = compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; + opt0 = (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | DELACK(1) | @@ -528,7 +653,7 @@ static int send_connect(struct c4iw_ep *ep) SMAC_SEL(ep->smac_idx) | DSCP(ep->tos) | ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(rcv_win>>10); + RCV_BUFSIZ(win); opt2 = RX_CHANNEL(0) | CCTRL_ECN(enable_ecn) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); @@ -538,19 +663,108 @@ static int send_connect(struct c4iw_ep *ep) opt2 |= SACK_EN(1); if (wscale && enable_tcp_window_scaling) opt2 |= WND_SCALE_EN(1); - t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure); - - req = (struct cpl_act_open_req *) skb_put(skb, wrlen); - INIT_TP_WR(req, 0); - OPCODE_TID(req) = cpu_to_be32( - MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ((ep->rss_qid<<14)|ep->atid))); - req->local_port = ep->com.local_addr.sin_port; - req->peer_port = ep->com.remote_addr.sin_port; - req->local_ip = ep->com.local_addr.sin_addr.s_addr; - req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; - req->opt0 = cpu_to_be64(opt0); - req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); - req->opt2 = cpu_to_be32(opt2); + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + } + t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure); + + if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) { + if (ep->com.remote_addr.ss_family == AF_INET) { + req = (struct cpl_act_open_req *) skb_put(skb, wrlen); + INIT_TP_WR(req, 0); + OPCODE_TID(req) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid << 14) | ep->atid))); + req->local_port = la->sin_port; + req->peer_port = ra->sin_port; + req->local_ip = la->sin_addr.s_addr; + req->peer_ip = ra->sin_addr.s_addr; + req->opt0 = cpu_to_be64(opt0); + req->params = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); + req->opt2 = cpu_to_be32(opt2); + } else { + req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen); + + INIT_TP_WR(req6, 0); + OPCODE_TID(req6) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + ((ep->rss_qid<<14)|ep->atid))); + req6->local_port = la6->sin6_port; + req6->peer_port = ra6->sin6_port; + req6->local_ip_hi = *((__be64 *) + (la6->sin6_addr.s6_addr)); + req6->local_ip_lo = *((__be64 *) + (la6->sin6_addr.s6_addr + 8)); + req6->peer_ip_hi = *((__be64 *) + (ra6->sin6_addr.s6_addr)); + req6->peer_ip_lo = *((__be64 *) + (ra6->sin6_addr.s6_addr + 8)); + req6->opt0 = cpu_to_be64(opt0); + req6->params = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); + req6->opt2 = cpu_to_be32(opt2); + } + } else { + u32 isn = (prandom_u32() & ~7UL) - 1; + + opt2 |= T5_OPT_2_VALID; + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + if (peer2peer) + isn += 4; + + if (ep->com.remote_addr.ss_family == AF_INET) { + t5_req = (struct cpl_t5_act_open_req *) + skb_put(skb, wrlen); + INIT_TP_WR(t5_req, 0); + OPCODE_TID(t5_req) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + ((ep->rss_qid << 14) | ep->atid))); + t5_req->local_port = la->sin_port; + t5_req->peer_port = ra->sin_port; + t5_req->local_ip = la->sin_addr.s_addr; + t5_req->peer_ip = ra->sin_addr.s_addr; + t5_req->opt0 = cpu_to_be64(opt0); + t5_req->params = cpu_to_be64(V_FILTER_TUPLE( + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t))); + t5_req->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, + be32_to_cpu(t5_req->rsvd)); + t5_req->opt2 = cpu_to_be32(opt2); + } else { + t5_req6 = (struct cpl_t5_act_open_req6 *) + skb_put(skb, wrlen); + INIT_TP_WR(t5_req6, 0); + OPCODE_TID(t5_req6) = cpu_to_be32( + MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + ((ep->rss_qid<<14)|ep->atid))); + t5_req6->local_port = la6->sin6_port; + t5_req6->peer_port = ra6->sin6_port; + t5_req6->local_ip_hi = *((__be64 *) + (la6->sin6_addr.s6_addr)); + t5_req6->local_ip_lo = *((__be64 *) + (la6->sin6_addr.s6_addr + 8)); + t5_req6->peer_ip_hi = *((__be64 *) + (ra6->sin6_addr.s6_addr)); + t5_req6->peer_ip_lo = *((__be64 *) + (ra6->sin6_addr.s6_addr + 8)); + t5_req6->opt0 = cpu_to_be64(opt0); + t5_req6->params = (__force __be64)cpu_to_be32( + cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], + ep->l2t)); + t5_req6->rsvd = cpu_to_be32(isn); + PDBG("%s snd_isn %u\n", __func__, + be32_to_cpu(t5_req6->rsvd)); + t5_req6->opt2 = cpu_to_be32(opt2); + } + } + set_bit(ACT_OPEN_REQ, &ep->com.history); return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -642,8 +856,9 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, ep->mpa_skb = skb; c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); + __state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; + ep->snd_seq += mpalen; return; } @@ -687,7 +902,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; + mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { @@ -723,6 +938,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) t4_set_arp_err_handler(skb, NULL, arp_failure_discard); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -806,7 +1022,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) skb_get(skb); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); + __state_set(&ep->com, MPA_REP_SENT); + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -823,6 +1040,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid, be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); + mutex_lock(&ep->com.mutex); dst_confirm(ep->dst); /* setup the hwtid for this connection */ @@ -846,24 +1064,24 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) send_mpa_req(ep, skb, 1); else send_mpa_req(ep, skb, mpa_rev); - + mutex_unlock(&ep->com.mutex); return 0; } -static void close_complete_upcall(struct c4iw_ep *ep) +static void close_complete_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; + event.status = status; if (ep->com.cm_id) { PDBG("close complete delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -871,8 +1089,7 @@ static void close_complete_upcall(struct c4iw_ep *ep) static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - close_complete_upcall(ep); - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); } @@ -906,7 +1123,6 @@ static void peer_abort_upcall(struct c4iw_ep *ep) ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -919,8 +1135,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REPLY; event.status = status; - event.local_addr = ep->com.local_addr; - event.remote_addr = ep->com.remote_addr; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); if ((status == 0) || (status == -ECONNREFUSED)) { if (!ep->tried_with_mpa_v1) { @@ -946,19 +1164,21 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) if (status < 0) { ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; } } -static void connect_request_upcall(struct c4iw_ep *ep) +static int connect_request_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; + int ret; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CONNECT_REQUEST; - event.local_addr = ep->com.local_addr; - event.remote_addr = ep->com.remote_addr; + memcpy(&event.local_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + memcpy(&event.remote_addr, &ep->com.remote_addr, + sizeof(ep->com.remote_addr)); event.provider_data = ep; if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ @@ -975,15 +1195,14 @@ static void connect_request_upcall(struct c4iw_ep *ep) event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } - if (state_read(&ep->parent_ep->com) != DEAD) { - c4iw_get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } + c4iw_get_ep(&ep->com); + ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, + &event); + if (ret) + c4iw_put_ep(&ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; + return ret; } static void established_upcall(struct c4iw_ep *ep) @@ -1015,6 +1234,14 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) return 0; } + /* + * If we couldn't specify the entire rcv window at connection setup + * due to the limit in the number of bits in the RCV_BUFSIZ field, + * then add the overage in to the credits returned. + */ + if (ep->rcv_win > RCV_BUFSIZ_MASK * 1024) + credits += ep->rcv_win - RCV_BUFSIZ_MASK * 1024; + req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); memset(req, 0, wrlen); INIT_TP_WR(req, ep->hwtid); @@ -1028,7 +1255,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) return credits; } -static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) +static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) { struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params; @@ -1038,17 +1265,17 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; int err; + int disconnect = 0; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); /* - * Stop mpa timer. If it expired, then the state has - * changed and we bail since ep_timeout already aborted - * the connection. + * Stop mpa timer. If it expired, then + * we ignore the MPA reply. process_timeout() + * will abort the connection. */ - stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) - return; + if (stop_ep_timer(ep)) + return 0; /* * If we get more than the supported amount of private data @@ -1070,7 +1297,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * if we don't even have the mpa message, then bail. */ if (ep->mpa_pkt_len < sizeof(*mpa)) - return; + return 0; mpa = (struct mpa_message *) ep->mpa_pkt; /* Validate MPA header. */ @@ -1110,7 +1337,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * We'll continue process when more data arrives. */ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) - return; + return 0; if (mpa->flags & MPA_REJECT) { err = -ECONNREFUSED; @@ -1122,7 +1349,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * start reply message including private data. And * the MPA header is valid. */ - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -1212,9 +1439,11 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_NOMATCH_RTR; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; + disconnect = 1; goto out; } @@ -1230,18 +1459,20 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) attrs.layer_etype = LAYER_MPA | DDP_LLP; attrs.ecode = MPA_INSUFF_IRD; attrs.next_state = C4IW_QP_STATE_TERMINATE; + attrs.send_term = 1; err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); err = -ENOMEM; + disconnect = 1; goto out; } goto out; err: - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); send_abort(ep, skb, GFP_KERNEL); out: connect_reply_upcall(ep, err); - return; + return disconnect; } static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) @@ -1252,15 +1483,12 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) != MPA_REQ_WAIT) - return; - /* * If we get more than the supported amount of private data * then we must fail this connection. */ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1282,7 +1510,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) return; PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - stop_ep_timer(ep); mpa = (struct mpa_message *) ep->mpa_pkt; /* @@ -1291,11 +1518,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa->revision > mpa_rev) { printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1306,6 +1535,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1314,6 +1544,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + (void)stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1370,10 +1601,24 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); - state_set(&ep->com, MPA_REQ_RCVD); - - /* drive upcall */ - connect_request_upcall(ep); + /* + * If the endpoint timer already expired, then we ignore + * the start request. process_timeout() will abort + * the connection. + */ + if (!stop_ep_timer(ep)) { + __state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + mutex_lock(&ep->parent_ep->com.mutex); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) + abort_connection(ep, skb, GFP_KERNEL); + } else { + abort_connection(ep, skb, GFP_KERNEL); + } + mutex_unlock(&ep->parent_ep->com.mutex); + } return; } @@ -1385,38 +1630,48 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(hdr); struct tid_info *t = dev->rdev.lldi.tids; __u8 status = hdr->status; + int disconnect = 0; ep = lookup_tid(t, tid); + if (!ep) + return 0; PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); - - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); + mutex_lock(&ep->com.mutex); /* update RX credits */ update_rx_credits(ep, dlen); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: - process_mpa_reply(ep, skb); + ep->rcv_seq += dlen; + disconnect = process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: + ep->rcv_seq += dlen; process_mpa_request(ep, skb); break; - case MPA_REP_SENT: + case FPDU_MODE: { + struct c4iw_qp_attributes attrs; + BUG_ON(!ep->com.qp); + if (status) + pr_err("%s Unexpected streaming data." \ + " qpid %u ep %p state %d tid %u status %d\n", + __func__, ep->com.qp->wq.sq.qid, ep, + ep->com.state, ep->hwtid, status); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + disconnect = 1; break; + } default: - pr_err("%s Unexpected streaming data." \ - " ep %p state %d tid %u status %d\n", - __func__, ep, state_read(&ep->com), ep->hwtid, status); - - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ break; } + mutex_unlock(&ep->com.mutex); + if (disconnect) + c4iw_ep_disconnect(ep, 0, GFP_KERNEL); return 0; } @@ -1437,6 +1692,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); __state_set(&ep->com, DEAD); release = 1; break; @@ -1458,28 +1714,43 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) struct fw_ofld_connection_wr *req; unsigned int mtu_idx; int wscale; + struct sockaddr_in *sin; + int win; skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR)); req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); - req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, + req->le.filter = cpu_to_be32(cxgb4_select_ntuple( + ep->com.dev->rdev.lldi.ports[0], ep->l2t)); - req->le.lport = ep->com.local_addr.sin_port; - req->le.pport = ep->com.remote_addr.sin_port; - req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr; - req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr; + sin = (struct sockaddr_in *)&ep->com.mapped_local_addr; + req->le.lport = sin->sin_port; + req->le.u.ipv4.lip = sin->sin_addr.s_addr; + sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + req->le.pport = sin->sin_port; + req->le.u.ipv4.pip = sin->sin_addr.s_addr; req->tcb.t_state_to_astid = htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) | V_FW_OFLD_CONNECTION_WR_ASTID(atid)); req->tcb.cplrxdataack_cplpassacceptrpl = htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); - req->tcb.tx_max = jiffies; + req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps); wscale = compute_wscale(rcv_win); - req->tcb.opt0 = TCAM_BYPASS(1) | + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; + + req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | DELACK(1) | @@ -1490,20 +1761,20 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) SMAC_SEL(ep->smac_idx) | DSCP(ep->tos) | ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(rcv_win >> 10); - req->tcb.opt2 = PACE(1) | + RCV_BUFSIZ(win)); + req->tcb.opt2 = (__force __be32) (PACE(1) | TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | RX_CHANNEL(0) | CCTRL_ECN(enable_ecn) | - RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid)); if (enable_tcp_timestamps) - req->tcb.opt2 |= TSTAMPS_EN(1); + req->tcb.opt2 |= (__force __be32) TSTAMPS_EN(1); if (enable_tcp_sack) - req->tcb.opt2 |= SACK_EN(1); + req->tcb.opt2 |= (__force __be32) SACK_EN(1); if (wscale && enable_tcp_window_scaling) - req->tcb.opt2 |= WND_SCALE_EN(1); - req->tcb.opt0 = cpu_to_be64(req->tcb.opt0); - req->tcb.opt2 = cpu_to_be32(req->tcb.opt2); + req->tcb.opt2 |= (__force __be32) WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64((__force u64) req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32((__force u32) req->tcb.opt2); set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); set_bit(ACT_OFLD_CONN, &ep->com.history); c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); @@ -1518,16 +1789,118 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_ARP_MISS; } +/* Returns whether a CPL status conveys negative advice. + */ +static int is_neg_adv(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE; +} + +static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi) +{ + ep->snd_win = snd_win; + ep->rcv_win = rcv_win; + PDBG("%s snd_win %d rcv_win %d\n", __func__, ep->snd_win, ep->rcv_win); +} + #define ACT_OPEN_RETRY_COUNT 2 +static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, + struct dst_entry *dst, struct c4iw_dev *cdev, + bool clear_mpa_v1) +{ + struct neighbour *n; + int err, step; + struct net_device *pdev; + + n = dst_neigh_lookup(dst, peer_ip); + if (!n) + return -ENODEV; + + rcu_read_lock(); + err = -ENOMEM; + if (n->dev->flags & IFF_LOOPBACK) { + if (iptype == 4) + pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip); + else if (IS_ENABLED(CONFIG_IPV6)) + for_each_netdev(&init_net, pdev) { + if (ipv6_chk_addr(&init_net, + (struct in6_addr *)peer_ip, + pdev, 1)) + break; + } + else + pdev = NULL; + + if (!pdev) { + err = -ENODEV; + goto out; + } + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, pdev, 0); + if (!ep->l2t) + goto out; + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); + dev_put(pdev); + } else { + pdev = get_real_dev(n->dev); + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, pdev, 0); + if (!ep->l2t) + goto out; + ep->mtu = dst_mtu(dst); + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + set_tcp_window(ep, (struct port_info *)netdev_priv(pdev)); + + if (clear_mpa_v1) { + ep->retry_with_mpa_v1 = 0; + ep->tried_with_mpa_v1 = 0; + } + } + err = 0; +out: + rcu_read_unlock(); + + neigh_release(n); + + return err; +} + static int c4iw_reconnect(struct c4iw_ep *ep) { int err = 0; - struct rtable *rt; - struct port_info *pi; - struct net_device *pdev; - int step; - struct neighbour *neigh; + struct sockaddr_in *laddr = (struct sockaddr_in *) + &ep->com.cm_id->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *) + &ep->com.cm_id->remote_addr; + struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *) + &ep->com.cm_id->local_addr; + struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *) + &ep->com.cm_id->remote_addr; + int iptype; + __u8 *ra; PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); init_timer(&ep->timer); @@ -1544,51 +1917,28 @@ static int c4iw_reconnect(struct c4iw_ep *ep) insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid); /* find a route */ - rt = find_route(ep->com.dev, - ep->com.cm_id->local_addr.sin_addr.s_addr, - ep->com.cm_id->remote_addr.sin_addr.s_addr, - ep->com.cm_id->local_addr.sin_port, - ep->com.cm_id->remote_addr.sin_port, 0); - if (!rt) { + if (ep->com.cm_id->local_addr.ss_family == AF_INET) { + ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, laddr->sin_port, + raddr->sin_port, 0); + iptype = 4; + ra = (__u8 *)&raddr->sin_addr; + } else { + ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, raddr6->sin6_port, 0, + raddr6->sin6_scope_id); + iptype = 6; + ra = (__u8 *)&raddr6->sin6_addr; + } + if (!ep->dst) { pr_err("%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; goto fail3; } - ep->dst = &rt->dst; - - neigh = dst_neigh_lookup(ep->dst, - &ep->com.cm_id->remote_addr.sin_addr.s_addr); - /* get a l2t entry */ - if (neigh->dev->flags & IFF_LOOPBACK) { - PDBG("%s LOOPBACK\n", __func__); - pdev = ip_dev_find(&init_net, - ep->com.cm_id->remote_addr.sin_addr.s_addr); - ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - neigh, pdev, 0); - pi = (struct port_info *)netdev_priv(pdev); - ep->mtu = pdev->mtu; - ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; - dev_put(pdev); - } else { - ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - neigh, neigh->dev, 0); - pi = (struct port_info *)netdev_priv(neigh->dev); - ep->mtu = dst_mtu(ep->dst); - ep->tx_chan = cxgb4_port_chan(neigh->dev); - ep->smac_idx = (cxgb4_port_viid(neigh->dev) & - 0x7F) << 1; - } - - step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; - ep->txq_idx = pi->port_id * step; - ep->ctrlq_idx = pi->port_id; - step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan; - ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step]; - - if (!ep->l2t) { + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false); + if (err) { pr_err("%s - cannot alloc l2e.\n", __func__); - err = -ENOMEM; goto fail4; } @@ -1631,13 +1981,21 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) ntohl(rpl->atid_status))); struct tid_info *t = dev->rdev.lldi.tids; int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status)); + struct sockaddr_in *la; + struct sockaddr_in *ra; + struct sockaddr_in6 *la6; + struct sockaddr_in6 *ra6; ep = lookup_atid(t, atid); + la = (struct sockaddr_in *)&ep->com.mapped_local_addr; + ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr; PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (status == CPL_ERR_RTX_NEG_ADVICE) { + if (is_neg_adv(status)) { printk(KERN_WARNING MOD "Connection problems for atid %u\n", atid); return 0; @@ -1653,10 +2011,11 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) case CPL_ERR_CONN_TIMEDOUT: break; case CPL_ERR_TCAM_FULL: - if (dev->rdev.lldi.enable_fw_ofld_conn) { - mutex_lock(&dev->rdev.stats.lock); - dev->rdev.stats.tcam_full++; - mutex_unlock(&dev->rdev.stats.lock); + mutex_lock(&dev->rdev.stats.lock); + dev->rdev.stats.tcam_full++; + mutex_unlock(&dev->rdev.stats.lock); + if (ep->com.local_addr.ss_family == AF_INET && + dev->rdev.lldi.enable_fw_ofld_conn) { send_fw_act_open_req(ep, GET_TID_TID(GET_AOPEN_ATID( ntohl(rpl->atid_status)))); @@ -1676,13 +2035,17 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) } break; default: - printk(KERN_INFO MOD "Active open failure - " - "atid %u status %u errno %d %pI4:%u->%pI4:%u\n", - atid, status, status2errno(status), - &ep->com.local_addr.sin_addr.s_addr, - ntohs(ep->com.local_addr.sin_port), - &ep->com.remote_addr.sin_addr.s_addr, - ntohs(ep->com.remote_addr.sin_port)); + if (ep->com.local_addr.ss_family == AF_INET) { + pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n", + atid, status, status2errno(status), + &la->sin_addr.s_addr, ntohs(la->sin_port), + &ra->sin_addr.s_addr, ntohs(ra->sin_port)); + } else { + pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n", + atid, status, status2errno(status), + la6->sin6_addr.s6_addr, ntohs(la6->sin6_port), + ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port)); + } break; } @@ -1720,27 +2083,6 @@ out: return 0; } -static int listen_stop(struct c4iw_listen_ep *ep) -{ - struct sk_buff *skb; - struct cpl_close_listsvr_req *req; - - PDBG("%s ep %p\n", __func__, ep); - skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); - if (!skb) { - printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__); - return -ENOMEM; - } - req = (struct cpl_close_listsvr_req *) skb_put(skb, sizeof(*req)); - INIT_TP_WR(req, 0); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, - ep->stid)); - req->reply_ctrl = cpu_to_be16( - QUEUENO(ep->com.dev->rdev.lldi.rxq_ids[0])); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); - return c4iw_ofld_send(&ep->com.dev->rdev, skb); -} - static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_close_listsvr_rpl *rpl = cplhdr(skb); @@ -1753,7 +2095,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, +static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, struct cpl_pass_accept_req *req) { struct cpl_pass_accept_rpl *rpl; @@ -1761,13 +2103,36 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, u64 opt0; u32 opt2; int wscale; + struct cpl_t5_pass_accept_rpl *rpl5 = NULL; + int win; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); BUG_ON(skb_cloned(skb)); - skb_trim(skb, sizeof(*rpl)); + skb_get(skb); - cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); + rpl = cplhdr(skb); + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + skb_trim(skb, roundup(sizeof(*rpl5), 16)); + rpl5 = (void *)rpl; + INIT_TP_WR(rpl5, ep->hwtid); + } else { + skb_trim(skb, sizeof(*rpl)); + INIT_TP_WR(rpl, ep->hwtid); + } + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, + ep->hwtid)); + + best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp); wscale = compute_wscale(rcv_win); + + /* + * Specify the largest window that will fit in opt0. The + * remainder will be specified in the rx_data_ack. + */ + win = ep->rcv_win >> 10; + if (win > RCV_BUFSIZ_MASK) + win = RCV_BUFSIZ_MASK; opt0 = (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | DELACK(1) | @@ -1778,7 +2143,7 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, SMAC_SEL(ep->smac_idx) | DSCP(ep->tos >> 2) | ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(rcv_win>>10); + RCV_BUFSIZ(win); opt2 = RX_CHANNEL(0) | RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); @@ -1797,120 +2162,69 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb, if (tcph->ece && tcph->cwr) opt2 |= CCTRL_ECN(1); } + if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) { + u32 isn = (prandom_u32() & ~7UL) - 1; + opt2 |= T5_OPT_2_VALID; + opt2 |= V_CONG_CNTRL(CONG_ALG_TAHOE); + opt2 |= CONG_CNTRL_VALID; /* OPT_2_ISS for T5 */ + rpl5 = (void *)rpl; + memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16)); + if (peer2peer) + isn += 4; + rpl5->iss = cpu_to_be32(isn); + PDBG("%s iss %u\n", __func__, be32_to_cpu(rpl5->iss)); + } - rpl = cplhdr(skb); - INIT_TP_WR(rpl, ep->hwtid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, - ep->hwtid)); rpl->opt0 = cpu_to_be64(opt0); rpl->opt2 = cpu_to_be32(opt2); set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); + t4_set_arp_err_handler(skb, NULL, arp_failure_discard); c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); return; } -static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip, - struct sk_buff *skb) +static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) { - PDBG("%s c4iw_dev %p tid %u peer_ip %x\n", __func__, dev, hwtid, - peer_ip); + PDBG("%s c4iw_dev %p tid %u\n", __func__, dev, hwtid); BUG_ON(skb_cloned(skb)); skb_trim(skb, sizeof(struct cpl_tid_release)); - skb_get(skb); release_tid(&dev->rdev, hwtid, skb); return; } -static void get_4tuple(struct cpl_pass_accept_req *req, - __be32 *local_ip, __be32 *peer_ip, +static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype, + __u8 *local_ip, __u8 *peer_ip, __be16 *local_port, __be16 *peer_port) { int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len)); int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len)); struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); + struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); struct tcphdr *tcp = (struct tcphdr *) ((u8 *)(req + 1) + eth_len + ip_len); - PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, - ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), - ntohs(tcp->dest)); - - *peer_ip = ip->saddr; - *local_ip = ip->daddr; + if (ip->version == 4) { + PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, + ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), + ntohs(tcp->dest)); + *iptype = 4; + memcpy(peer_ip, &ip->saddr, 4); + memcpy(local_ip, &ip->daddr, 4); + } else { + PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__, + ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source), + ntohs(tcp->dest)); + *iptype = 6; + memcpy(peer_ip, ip6->saddr.s6_addr, 16); + memcpy(local_ip, ip6->daddr.s6_addr, 16); + } *peer_port = tcp->source; *local_port = tcp->dest; return; } -static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, - struct c4iw_dev *cdev, bool clear_mpa_v1) -{ - struct neighbour *n; - int err, step; - - n = dst_neigh_lookup(dst, &peer_ip); - if (!n) - return -ENODEV; - - rcu_read_lock(); - err = -ENOMEM; - if (n->dev->flags & IFF_LOOPBACK) { - struct net_device *pdev; - - pdev = ip_dev_find(&init_net, peer_ip); - if (!pdev) { - err = -ENODEV; - goto out; - } - ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, pdev, 0); - if (!ep->l2t) - goto out; - ep->mtu = pdev->mtu; - ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; - step = cdev->rdev.lldi.ntxq / - cdev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(pdev) * step; - step = cdev->rdev.lldi.nrxq / - cdev->rdev.lldi.nchan; - ep->ctrlq_idx = cxgb4_port_idx(pdev); - ep->rss_qid = cdev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(pdev) * step]; - dev_put(pdev); - } else { - ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, n->dev, 0); - if (!ep->l2t) - goto out; - ep->mtu = dst_mtu(dst); - ep->tx_chan = cxgb4_port_chan(n->dev); - ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1; - step = cdev->rdev.lldi.ntxq / - cdev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(n->dev) * step; - ep->ctrlq_idx = cxgb4_port_idx(n->dev); - step = cdev->rdev.lldi.nrxq / - cdev->rdev.lldi.nchan; - ep->rss_qid = cdev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(n->dev) * step]; - - if (clear_mpa_v1) { - ep->retry_with_mpa_v1 = 0; - ep->tried_with_mpa_v1 = 0; - } - } - err = 0; -out: - rcu_read_unlock(); - - neigh_release(n); - - return err; -} - static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *child_ep = NULL, *parent_ep; @@ -1919,23 +2233,18 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); struct dst_entry *dst; - struct rtable *rt; - __be32 local_ip, peer_ip = 0; + __u8 local_ip[16], peer_ip[16]; __be16 local_port, peer_port; int err; u16 peer_mss = ntohs(req->tcpopt.mss); + int iptype; + unsigned short hdrs; parent_ep = lookup_stid(t, stid); if (!parent_ep) { PDBG("%s connect request on invalid stid %d\n", __func__, stid); goto reject; } - get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port); - - PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \ - "rport %d peer_mss %d\n", __func__, parent_ep, hwtid, - ntohl(local_ip), ntohl(peer_ip), ntohs(local_port), - ntohs(peer_port), peer_mss); if (state_read(&parent_ep->com) != LISTEN) { printk(KERN_ERR "%s - listening ep not in LISTEN\n", @@ -1943,15 +2252,32 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } + get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port); + /* Find output route */ - rt = find_route(dev, local_ip, peer_ip, local_port, peer_port, - GET_POPEN_TOS(ntohl(req->tos_stid))); - if (!rt) { + if (iptype == 4) { + PDBG("%s parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n" + , __func__, parent_ep, hwtid, + local_ip, peer_ip, ntohs(local_port), + ntohs(peer_port), peer_mss); + dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, + GET_POPEN_TOS(ntohl(req->tos_stid))); + } else { + PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" + , __func__, parent_ep, hwtid, + local_ip, peer_ip, ntohs(local_port), + ntohs(peer_port), peer_mss); + dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port, + PASS_OPEN_TOS(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); + } + if (!dst) { printk(KERN_ERR MOD "%s - failed to find dst entry!\n", __func__); goto reject; } - dst = &rt->dst; child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); if (!child_ep) { @@ -1961,7 +2287,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - err = import_ep(child_ep, peer_ip, dst, dev, false); + err = import_ep(child_ep, iptype, peer_ip, dst, dev, false); if (err) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1970,18 +2296,35 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - if (peer_mss && child_ep->mtu > (peer_mss + 40)) - child_ep->mtu = peer_mss + 40; + hdrs = sizeof(struct iphdr) + sizeof(struct tcphdr) + + ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0); + if (peer_mss && child_ep->mtu > (peer_mss + hdrs)) + child_ep->mtu = peer_mss + hdrs; state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; - child_ep->com.local_addr.sin_family = PF_INET; - child_ep->com.local_addr.sin_port = local_port; - child_ep->com.local_addr.sin_addr.s_addr = local_ip; - child_ep->com.remote_addr.sin_family = PF_INET; - child_ep->com.remote_addr.sin_port = peer_port; - child_ep->com.remote_addr.sin_addr.s_addr = peer_ip; + if (iptype == 4) { + struct sockaddr_in *sin = (struct sockaddr_in *) + &child_ep->com.local_addr; + sin->sin_family = PF_INET; + sin->sin_port = local_port; + sin->sin_addr.s_addr = *(__be32 *)local_ip; + sin = (struct sockaddr_in *)&child_ep->com.remote_addr; + sin->sin_family = PF_INET; + sin->sin_port = peer_port; + sin->sin_addr.s_addr = *(__be32 *)peer_ip; + } else { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &child_ep->com.local_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = local_port; + memcpy(sin6->sin6_addr.s6_addr, local_ip, 16); + sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr; + sin6->sin6_family = PF_INET6; + sin6->sin6_port = peer_port; + memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16); + } c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid)); @@ -1993,11 +2336,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); - accept_cr(child_ep, peer_ip, skb, req); + insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); + accept_cr(child_ep, skb, req); set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); goto out; reject: - reject_cr(dev, hwtid, peer_ip, skb); + reject_cr(dev, hwtid, skb); out: return 0; } @@ -2018,7 +2362,6 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ntohs(req->tcp_opt)); set_emss(ep, ntohs(req->tcp_opt)); - insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); dst_confirm(ep->dst); state_set(&ep->com, MPA_REQ_WAIT); @@ -2090,13 +2433,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) disconnect = 0; break; case MORIBUND: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); if (ep->com.cm_id && ep->com.qp) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; disconnect = 0; @@ -2115,15 +2458,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } -/* - * Returns whether an ABORT_REQ_RSS message is a negative advice. - */ -static int is_neg_adv_abort(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE; -} - static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); @@ -2137,7 +2471,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int tid = GET_TID(req); ep = lookup_tid(t, tid); - if (is_neg_adv_abort(req->status)) { + if (is_neg_adv(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); return 0; @@ -2159,11 +2493,11 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) case CONNECTING: break; case MPA_REQ_WAIT: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); break; case MPA_REQ_SENT: - stop_ep_timer(ep); - if (mpa_rev == 2 && ep->tried_with_mpa_v1) + (void)stop_ep_timer(ep); + if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { /* @@ -2235,9 +2569,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) out: if (release) release_ep_resources(ep); - - /* retry with mpa-v1 */ - if (ep && ep->retry_with_mpa_v1) { + else if (ep->retry_with_mpa_v1) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); @@ -2268,7 +2601,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) __state_set(&ep->com, MORIBUND); break; case MORIBUND: - stop_ep_timer(ep); + (void)stop_ep_timer(ep); if ((ep->com.cm_id) && (ep->com.qp)) { attrs.next_state = C4IW_QP_STATE_IDLE; c4iw_modify_qp(ep->com.qp->rhp, @@ -2276,7 +2609,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; break; @@ -2351,22 +2684,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err; + int err = 0; + int disconnect = 0; struct c4iw_ep *ep = to_ep(cm_id); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return -ECONNRESET; } set_bit(ULP_REJECT, &ep->com.history); - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); else { err = send_mpa_reject(ep, pdata, pdata_len); - err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + disconnect = 1; } + mutex_unlock(&ep->com.mutex); + if (disconnect) + err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } @@ -2381,12 +2720,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { err = -ECONNRESET; goto err; } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); @@ -2430,6 +2771,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = qp; + ref_qp(ep); /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; @@ -2454,25 +2796,95 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) goto err1; - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); established_upcall(ep); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return 0; err1: ep->com.cm_id = NULL; - ep->com.qp = NULL; cm_id->rem_ref(cm_id); err: + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return err; } +static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) +{ + struct in_device *ind; + int found = 0; + struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr; + struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr; + + ind = in_dev_get(dev->rdev.lldi.ports[0]); + if (!ind) + return -EADDRNOTAVAIL; + for_primary_ifa(ind) { + laddr->sin_addr.s_addr = ifa->ifa_address; + raddr->sin_addr.s_addr = ifa->ifa_address; + found = 1; + break; + } + endfor_ifa(ind); + in_dev_put(ind); + return found ? 0 : -EADDRNOTAVAIL; +} + +static int get_lladdr(struct net_device *dev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_dev *idev; + int err = -EADDRNOTAVAIL; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev != NULL) { + struct inet6_ifaddr *ifp; + + read_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + memcpy(addr, &ifp->addr, 16); + err = 0; + break; + } + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + return err; +} + +static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id) +{ + struct in6_addr uninitialized_var(addr); + struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr; + struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr; + + if (get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) { + memcpy(la6->sin6_addr.s6_addr, &addr, 16); + memcpy(ra6->sin6_addr.s6_addr, &addr, 16); + return 0; + } + return -EADDRNOTAVAIL; +} + int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep; - struct rtable *rt; int err = 0; + struct sockaddr_in *laddr; + struct sockaddr_in *raddr; + struct sockaddr_in6 *laddr6; + struct sockaddr_in6 *raddr6; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + __u8 *ra; + int iptype; + int iwpm_err = 0; if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { @@ -2500,7 +2912,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.dev = dev; ep->com.cm_id = cm_id; ep->com.qp = get_qhp(dev, conn_param->qpn); - BUG_ON(!ep->com.qp); + if (!ep->com.qp) { + PDBG("%s qpn 0x%x not found!\n", __func__, conn_param->qpn); + err = -EINVAL; + goto fail1; + } + ref_qp(ep); PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, ep->com.qp, cm_id); @@ -2511,34 +2928,103 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->atid == -1) { printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__); err = -ENOMEM; - goto fail2; + goto fail1; } insert_handle(dev, &dev->atid_idr, ep, ep->atid); - PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__, - ntohl(cm_id->local_addr.sin_addr.s_addr), - ntohs(cm_id->local_addr.sin_port), - ntohl(cm_id->remote_addr.sin_addr.s_addr), - ntohs(cm_id->remote_addr.sin_port)); + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); + memcpy(&ep->com.remote_addr, &cm_id->remote_addr, + sizeof(ep->com.remote_addr)); + + /* No port mapper available, go with the specified peer information */ + memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, + sizeof(ep->com.mapped_local_addr)); + memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr, + sizeof(ep->com.mapped_remote_addr)); + + c4iw_form_reg_msg(dev, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); + if (iwpm_err) { + PDBG("%s: Port Mapper reg pid fail (err = %d).\n", + __func__, iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + c4iw_form_pm_msg(ep, &pm_msg); + iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW); + if (iwpm_err) + PDBG("%s: Port Mapper query fail (err = %d).\n", + __func__, iwpm_err); + else + c4iw_record_pm_msg(ep, &pm_msg); + } + if (iwpm_create_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { + iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW); + err = -ENOMEM; + goto fail1; + } + print_addr(&ep->com, __func__, "add_query/create_mapinfo"); + set_bit(RELEASE_MAPINFO, &ep->com.flags); + + laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr; + raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr; + laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr; + raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr; - /* find a route */ - rt = find_route(dev, - cm_id->local_addr.sin_addr.s_addr, - cm_id->remote_addr.sin_addr.s_addr, - cm_id->local_addr.sin_port, - cm_id->remote_addr.sin_port, 0); - if (!rt) { + if (cm_id->remote_addr.ss_family == AF_INET) { + iptype = 4; + ra = (__u8 *)&raddr->sin_addr; + + /* + * Handle loopback requests to INADDR_ANY. + */ + if ((__force int)raddr->sin_addr.s_addr == INADDR_ANY) { + err = pick_local_ipaddrs(dev, cm_id); + if (err) + goto fail1; + } + + /* find a route */ + PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", + __func__, &laddr->sin_addr, ntohs(laddr->sin_port), + ra, ntohs(raddr->sin_port)); + ep->dst = find_route(dev, laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, laddr->sin_port, + raddr->sin_port, 0); + } else { + iptype = 6; + ra = (__u8 *)&raddr6->sin6_addr; + + /* + * Handle loopback requests to INADDR_ANY. + */ + if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) { + err = pick_local_ip6addrs(dev, cm_id); + if (err) + goto fail1; + } + + /* find a route */ + PDBG("%s saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n", + __func__, laddr6->sin6_addr.s6_addr, + ntohs(laddr6->sin6_port), + raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); + ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, raddr6->sin6_port, 0, + raddr6->sin6_scope_id); + } + if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); err = -EHOSTUNREACH; - goto fail3; + goto fail2; } - ep->dst = &rt->dst; - err = import_ep(ep, cm_id->remote_addr.sin_addr.s_addr, - ep->dst, ep->com.dev, true); + err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true); if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - goto fail4; + goto fail3; } PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n", @@ -2547,8 +3033,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) state_set(&ep->com, CONNECTING); ep->tos = 0; - ep->com.local_addr = cm_id->local_addr; - ep->com.remote_addr = cm_id->remote_addr; /* send connect request to rnic */ err = send_connect(ep); @@ -2556,23 +3040,82 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto out; cxgb4_l2t_release(ep->l2t); -fail4: - dst_release(ep->dst); fail3: + dst_release(ep->dst); +fail2: remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid); cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); -fail2: +fail1: cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); out: return err; } +static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) +{ + int err; + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0], + ep->stid, &sin6->sin6_addr, + sin6->sin6_port, + ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + if (err) + pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n", + err, ep->stid, + sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port)); + return err; +} + +static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep) +{ + int err; + struct sockaddr_in *sin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + + if (dev->rdev.lldi.enable_fw_ofld_conn) { + do { + err = cxgb4_create_server_filter( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + sin->sin_addr.s_addr, sin->sin_port, 0, + ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0); + if (err == -EBUSY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(usecs_to_jiffies(100)); + } + } while (err == -EBUSY); + } else { + c4iw_init_wr_wait(&ep->com.wr_wait); + err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], + ep->stid, sin->sin_addr.s_addr, sin->sin_port, + 0, ep->com.dev->rdev.lldi.rxq_ids[0]); + if (!err) + err = c4iw_wait_for_reply(&ep->com.dev->rdev, + &ep->com.wr_wait, + 0, 0, __func__); + } + if (err) + pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n" + , err, ep->stid, + &sin->sin_addr, ntohs(sin->sin_port)); + return err; +} + int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) { int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_listen_ep *ep; + struct iwpm_dev_data pm_reg_msg; + struct iwpm_sa_data pm_msg; + int iwpm_err = 0; might_sleep(); @@ -2587,15 +3130,19 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) ep->com.cm_id = cm_id; ep->com.dev = dev; ep->backlog = backlog; - ep->com.local_addr = cm_id->local_addr; + memcpy(&ep->com.local_addr, &cm_id->local_addr, + sizeof(ep->com.local_addr)); /* * Allocate a server TID. */ - if (dev->rdev.lldi.enable_fw_ofld_conn) - ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep); + if (dev->rdev.lldi.enable_fw_ofld_conn && + ep->com.local_addr.ss_family == AF_INET) + ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, + cm_id->local_addr.ss_family, ep); else - ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep); + ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, + cm_id->local_addr.ss_family, ep); if (ep->stid == -1) { printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); @@ -2603,44 +3150,50 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) goto fail2; } insert_handle(dev, &dev->stid_idr, ep, ep->stid); - state_set(&ep->com, LISTEN); - if (dev->rdev.lldi.enable_fw_ofld_conn) { - do { - err = cxgb4_create_server_filter( - ep->com.dev->rdev.lldi.ports[0], ep->stid, - ep->com.local_addr.sin_addr.s_addr, - ep->com.local_addr.sin_port, - 0, - ep->com.dev->rdev.lldi.rxq_ids[0], - 0, - 0); - if (err == -EBUSY) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(100)); - } - } while (err == -EBUSY); - } else { - c4iw_init_wr_wait(&ep->com.wr_wait); - err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], - ep->stid, ep->com.local_addr.sin_addr.s_addr, - ep->com.local_addr.sin_port, - 0, - ep->com.dev->rdev.lldi.rxq_ids[0]); - if (!err) - err = c4iw_wait_for_reply(&ep->com.dev->rdev, - &ep->com.wr_wait, - 0, 0, __func__); + + /* No port mapper available, go with the specified info */ + memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr, + sizeof(ep->com.mapped_local_addr)); + + c4iw_form_reg_msg(dev, &pm_reg_msg); + iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW); + if (iwpm_err) { + PDBG("%s: Port Mapper reg pid fail (err = %d).\n", + __func__, iwpm_err); + } + if (iwpm_valid_pid() && !iwpm_err) { + memcpy(&pm_msg.loc_addr, &ep->com.local_addr, + sizeof(ep->com.local_addr)); + iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW); + if (iwpm_err) + PDBG("%s: Port Mapper query fail (err = %d).\n", + __func__, iwpm_err); + else + memcpy(&ep->com.mapped_local_addr, + &pm_msg.mapped_loc_addr, + sizeof(ep->com.mapped_local_addr)); + } + if (iwpm_create_mapinfo(&ep->com.local_addr, + &ep->com.mapped_local_addr, RDMA_NL_C4IW)) { + err = -ENOMEM; + goto fail3; } + print_addr(&ep->com, __func__, "add_mapping/create_mapinfo"); + + set_bit(RELEASE_MAPINFO, &ep->com.flags); + state_set(&ep->com, LISTEN); + if (ep->com.local_addr.ss_family == AF_INET) + err = create_server4(dev, ep); + else + err = create_server6(dev, ep); if (!err) { cm_id->provider_data = ep; goto out; } - pr_err("%s cxgb4_create_server/filter failed err %d " \ - "stid %d laddr %08x lport %d\n", \ - __func__, err, ep->stid, - ntohl(ep->com.local_addr.sin_addr.s_addr), - ntohs(ep->com.local_addr.sin_port)); - cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); + +fail3: + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, + ep->com.local_addr.ss_family); fail2: cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); @@ -2658,20 +3211,24 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) might_sleep(); state_set(&ep->com, DEAD); - if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) { + if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn && + ep->com.local_addr.ss_family == AF_INET) { err = cxgb4_remove_server_filter( ep->com.dev->rdev.lldi.ports[0], ep->stid, ep->com.dev->rdev.lldi.rxq_ids[0], 0); } else { c4iw_init_wr_wait(&ep->com.wr_wait); - err = listen_stop(ep); + err = cxgb4_remove_server( + ep->com.dev->rdev.lldi.ports[0], ep->stid, + ep->com.dev->rdev.lldi.rxq_ids[0], 0); if (err) goto done; err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0, __func__); } remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid); - cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); + cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, + ep->com.local_addr.ss_family); done: cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); @@ -2693,7 +3250,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { fatal = 1; - close_complete_upcall(ep); + close_complete_upcall(ep, -EIO); ep->com.state = DEAD; } switch (ep->com.state) { @@ -2715,7 +3272,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { close = 1; if (abrupt) { - stop_ep_timer(ep); + (void)stop_ep_timer(ep); ep->com.state = ABORTING; } else ep->com.state = MORIBUND; @@ -2735,7 +3292,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); - close_complete_upcall(ep); + close_complete_upcall(ep, -ECONNRESET); ret = send_abort(ep, NULL, gfp); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); @@ -2756,7 +3313,8 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, struct c4iw_ep *ep; int atid = be32_to_cpu(req->tid); - ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, req->tid); + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, + (__force u32) req->tid); if (!ep) return; @@ -2800,7 +3358,7 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, struct cpl_pass_accept_req *cpl; int ret; - rpl_skb = (struct sk_buff *)cpu_to_be64(req->cookie); + rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; BUG_ON(!rpl_skb); if (req->retval) { PDBG("%s passive open failure %d\n", __func__, req->retval); @@ -2811,7 +3369,8 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, } else { cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, - htonl(req->tid))); + (__force u32) htonl( + (__force u32) req->tid))); ret = pass_accept_req(dev, rpl_skb); if (!ret) kfree_skb(rpl_skb); @@ -2850,17 +3409,19 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb) static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) { u32 l2info; - u16 vlantag, len, hdr_len; + u16 vlantag, len, hdr_len, eth_hdr_len; u8 intf; struct cpl_rx_pkt *cpl = cplhdr(skb); struct cpl_pass_accept_req *req; struct tcp_options_received tmp_opt; + struct c4iw_dev *dev; + dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); /* Store values from cpl_rx_pkt in temporary location. */ - vlantag = cpl->vlan; - len = cpl->len; - l2info = cpl->l2info; - hdr_len = cpl->hdr_len; + vlantag = (__force u16) cpl->vlan; + len = (__force u16) cpl->len; + l2info = (__force u32) cpl->l2info; + hdr_len = (__force u16) cpl->hdr_len; intf = cpl->iff; __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); @@ -2871,19 +3432,26 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(skb, &tmp_opt, 0, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); req->l2info = cpu_to_be16(V_SYN_INTF(intf) | - V_SYN_MAC_IDX(G_RX_MACIDX(htonl(l2info))) | + V_SYN_MAC_IDX(G_RX_MACIDX( + (__force int) htonl(l2info))) | F_SYN_XACT_MATCH); - req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(htonl(l2info))) | - V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(htons(hdr_len))) | - V_IP_HDR_LEN(G_RX_IPHDR_LEN(htons(hdr_len))) | - V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(htonl(l2info)))); - req->vlan = vlantag; - req->len = len; + eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? + G_RX_ETHHDR_LEN((__force int) htonl(l2info)) : + G_RX_T5_ETHHDR_LEN((__force int) htonl(l2info)); + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN( + (__force int) htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN( + (__force int) htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN( + (__force int) htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(eth_hdr_len))); + req->vlan = (__force __be16) vlantag; + req->len = (__force __be16) len; req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | PASS_OPEN_TOS(tos)); req->tcpopt.mss = htons(tmp_opt.mss_clamp); @@ -2905,6 +3473,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, struct sk_buff *req_skb; struct fw_ofld_connection_wr *req; struct cpl_pass_accept_req *cpl = cplhdr(skb); + int ret; req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); @@ -2912,7 +3481,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); - req->le.filter = filter; + req->le.filter = (__force __be32) filter; req->le.lport = lport; req->le.pport = rport; req->le.u.ipv4.lip = laddr; @@ -2938,10 +3507,16 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, * TP will ignore any value > 0 for MSS index. */ req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); - req->cookie = cpu_to_be64((u64)skb); + req->cookie = (unsigned long)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); - cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + if (ret < 0) { + pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, + ret); + kfree_skb(skb); + kfree_skb(req_skb); + } } /* @@ -2964,12 +3539,11 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_pass_accept_req *req = (void *)(rss + 1); struct l2t_entry *e; struct dst_entry *dst; - struct rtable *rt; struct c4iw_ep *lep; u16 window; struct port_info *pi; struct net_device *pdev; - u16 rss_qid; + u16 rss_qid, eth_hdr_len; int step; u32 tx_chan; struct neighbour *neigh; @@ -2988,8 +3562,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) /* * Calculate the server tid from filter hit index from cpl_rx_pkt. */ - stid = cpu_to_be32(rss->hash_val) - dev->rdev.lldi.tids->sftid_base - + dev->rdev.lldi.tids->nstids; + stid = (__force int) cpu_to_be32((__force u32) rss->hash_val); lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); if (!lep) { @@ -2997,7 +3570,10 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) { + eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ? + G_RX_ETHHDR_LEN(htonl(cpl->l2info)) : + G_RX_T5_ETHHDR_LEN(htonl(cpl->l2info)); + if (eth_hdr_len == ETH_HLEN) { eh = (struct ethhdr *)(req + 1); iph = (struct iphdr *)(eh + 1); } else { @@ -3018,16 +3594,21 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), ntohs(tcph->source), iph->tos); - rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, - iph->tos); - if (!rt) { + dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, + iph->tos); + if (!dst) { pr_err("%s - failed to find dst entry!\n", __func__); goto reject; } - dst = &rt->dst; neigh = dst_neigh_lookup_skb(dst, skb); + if (!neigh) { + pr_err("%s - failed to allocate neigh!\n", + __func__); + goto free_dst; + } + if (neigh->dev->flags & IFF_LOOPBACK) { pdev = ip_dev_find(&init_net, iph->daddr); e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, @@ -3036,11 +3617,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) tx_chan = cxgb4_port_chan(pdev); dev_put(pdev); } else { + pdev = get_real_dev(neigh->dev); e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, - neigh->dev, 0); - pi = (struct port_info *)netdev_priv(neigh->dev); - tx_chan = cxgb4_port_chan(neigh->dev); + pdev, 0); + pi = (struct port_info *)netdev_priv(pdev); + tx_chan = cxgb4_port_chan(pdev); } + neigh_release(neigh); if (!e) { pr_err("%s - failed to allocate l2t entry!\n", __func__); @@ -3049,10 +3632,12 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; - window = htons(tcph->window); + window = (__force u16) htons((__force u16)tcph->window); /* Calcuate filter portion for LE region. */ - filter = cpu_to_be32(select_ntuple(dev, dst, e)); + filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple( + dev->rdev.lldi.ports[0], + e)); /* * Synthesize the cpl_pass_accept_req. We have everything except the @@ -3119,15 +3704,26 @@ static void process_timeout(struct c4iw_ep *ep) &attrs, 1); } __state_set(&ep->com, ABORTING); + close_complete_upcall(ep, -ETIMEDOUT); + break; + case ABORTING: + case DEAD: + + /* + * These states are expected if the ep timed out at the same + * time as another thread was calling stop_ep_timer(). + * So we silently do nothing for these states. + */ + abort = 0; break; default: WARN(1, "%s unexpected state ep %p tid %u state %u\n", __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - mutex_unlock(&ep->com.mutex); if (abort) abort_connection(ep, NULL, GFP_KERNEL); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); } @@ -3141,6 +3737,8 @@ static void process_timedout_eps(void) tmp = timeout_list.next; list_del(tmp); + tmp->next = NULL; + tmp->prev = NULL; spin_unlock_irq(&timeout_lock); ep = list_entry(tmp, struct c4iw_ep, entry); process_timeout(ep); @@ -3157,6 +3755,7 @@ static void process_work(struct work_struct *work) unsigned int opcode; int ret; + process_timedout_eps(); while ((skb = skb_dequeue(&rxq))) { rpl = cplhdr(skb); dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *))); @@ -3166,8 +3765,8 @@ static void process_work(struct work_struct *work) ret = work_handlers[opcode](dev, skb); if (!ret) kfree_skb(skb); + process_timedout_eps(); } - process_timedout_eps(); } static DECLARE_WORK(skb_work, process_work); @@ -3175,11 +3774,21 @@ static DECLARE_WORK(skb_work, process_work); static void ep_timeout(unsigned long arg) { struct c4iw_ep *ep = (struct c4iw_ep *)arg; + int kickit = 0; spin_lock(&timeout_lock); - list_add_tail(&ep->entry, &timeout_list); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + /* + * Only insert if it is not already on the list. + */ + if (!ep->entry.next) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } + } spin_unlock(&timeout_lock); - queue_work(workq, &skb_work); + if (kickit) + queue_work(workq, &skb_work); } /* @@ -3257,7 +3866,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (is_neg_adv_abort(req->status)) { + if (is_neg_adv(req->status)) { PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, ep->hwtid); kfree_skb(skb); @@ -3268,8 +3877,14 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) /* * Wake up any threads in rdma_init() or rdma_fini(). + * However, if we are on MPAv2 and want to retry with MPAv1 + * then, don't wake up yet. */ - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + if (mpa_rev == 2 && !ep->tried_with_mpa_v1) { + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + } else + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); sched(dev, skb); return 0; } @@ -3310,7 +3925,7 @@ int __init c4iw_cm_init(void) return 0; } -void __exit c4iw_cm_term(void) +void c4iw_cm_term(void) { WARN_ON(!list_empty(&timeout_list)); flush_workqueue(workq); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 0f1607c8325..c04292c950f 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -134,7 +134,8 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, V_FW_RI_RES_WR_IQANUS(0) | V_FW_RI_RES_WR_IQANUD(1) | F_FW_RI_RES_WR_IQANDST | - V_FW_RI_RES_WR_IQANDSTINDEX(*rdev->lldi.rxq_ids)); + V_FW_RI_RES_WR_IQANDSTINDEX( + rdev->lldi.ciq_ids[cq->vector])); res->u.cq.iqdroprss_to_iqesize = cpu_to_be16( F_FW_RI_RES_WR_IQDROPRSS | V_FW_RI_RES_WR_IQPCIECH(2) | @@ -225,43 +226,186 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq, t4_swcq_produce(cq); } -int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count) +static void advance_oldest_read(struct t4_wq *wq); + +int c4iw_flush_sq(struct c4iw_qp *qhp) { int flushed = 0; - struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count]; - int in_use = wq->sq.in_use - count; + struct t4_wq *wq = &qhp->wq; + struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); + struct t4_cq *cq = &chp->cq; + int idx; + struct t4_swsqe *swsqe; - BUG_ON(in_use < 0); - while (in_use--) { - swsqe->signaled = 0; + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + idx = wq->sq.flush_cidx; + BUG_ON(idx >= wq->sq.size); + while (idx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[idx]; + BUG_ON(swsqe->flushed); + swsqe->flushed = 1; insert_sq_cqe(wq, cq, swsqe); - swsqe++; - if (swsqe == (wq->sq.sw_sq + wq->sq.size)) - swsqe = wq->sq.sw_sq; + if (wq->sq.oldest_read == swsqe) { + BUG_ON(swsqe->opcode != FW_RI_READ_REQ); + advance_oldest_read(wq); + } flushed++; + if (++idx == wq->sq.size) + idx = 0; } + wq->sq.flush_cidx += flushed; + if (wq->sq.flush_cidx >= wq->sq.size) + wq->sq.flush_cidx -= wq->sq.size; return flushed; } +static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) +{ + struct t4_swsqe *swsqe; + int cidx; + + if (wq->sq.flush_cidx == -1) + wq->sq.flush_cidx = wq->sq.cidx; + cidx = wq->sq.flush_cidx; + BUG_ON(cidx > wq->sq.size); + + while (cidx != wq->sq.pidx) { + swsqe = &wq->sq.sw_sq[cidx]; + if (!swsqe->signaled) { + if (++cidx == wq->sq.size) + cidx = 0; + } else if (swsqe->complete) { + + BUG_ON(swsqe->flushed); + + /* + * Insert this completed cqe into the swcq. + */ + PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n", + __func__, cidx, cq->sw_pidx); + swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); + cq->sw_queue[cq->sw_pidx] = swsqe->cqe; + t4_swcq_produce(cq); + swsqe->flushed = 1; + if (++cidx == wq->sq.size) + cidx = 0; + wq->sq.flush_cidx = cidx; + } else + break; + } +} + +static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, + struct t4_cqe *read_cqe) +{ + read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; + read_cqe->len = htonl(wq->sq.oldest_read->read_len); + read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | + V_CQE_SWCQE(SW_CQE(hw_cqe)) | + V_CQE_OPCODE(FW_RI_READ_REQ) | + V_CQE_TYPE(1)); + read_cqe->bits_type_ts = hw_cqe->bits_type_ts; +} + +static void advance_oldest_read(struct t4_wq *wq) +{ + + u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; + + if (rptr == wq->sq.size) + rptr = 0; + while (rptr != wq->sq.pidx) { + wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; + + if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) + return; + if (++rptr == wq->sq.size) + rptr = 0; + } + wq->sq.oldest_read = NULL; +} + /* * Move all CQEs from the HWCQ into the SWCQ. + * Deal with out-of-order and/or completions that complete + * prior unsignalled WRs. */ -void c4iw_flush_hw_cq(struct t4_cq *cq) +void c4iw_flush_hw_cq(struct c4iw_cq *chp) { - struct t4_cqe *cqe = NULL, *swcqe; + struct t4_cqe *hw_cqe, *swcqe, read_cqe; + struct c4iw_qp *qhp; + struct t4_swsqe *swsqe; int ret; - PDBG("%s cq %p cqid 0x%x\n", __func__, cq, cq->cqid); - ret = t4_next_hw_cqe(cq, &cqe); + PDBG("%s cqid 0x%x\n", __func__, chp->cq.cqid); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); + + /* + * This logic is similar to poll_cq(), but not quite the same + * unfortunately. Need to move pertinent HW CQEs to the SW CQ but + * also do any translation magic that poll_cq() normally does. + */ while (!ret) { - PDBG("%s flushing hwcq cidx 0x%x swcq pidx 0x%x\n", - __func__, cq->cidx, cq->sw_pidx); - swcqe = &cq->sw_queue[cq->sw_pidx]; - *swcqe = *cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - t4_swcq_produce(cq); - t4_hwcq_consume(cq); - ret = t4_next_hw_cqe(cq, &cqe); + qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); + + /* + * drop CQEs with no associated QP + */ + if (qhp == NULL) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) + goto next_cqe; + + if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { + + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) + goto next_cqe; + + /* drop peer2peer RTR reads. + */ + if (CQE_WRID_STAG(hw_cqe) == 1) + goto next_cqe; + + /* + * Eat completions for unsignaled read WRs. + */ + if (!qhp->wq.sq.oldest_read->signaled) { + advance_oldest_read(&qhp->wq); + goto next_cqe; + } + + /* + * Don't write to the HWCQ, create a new read req CQE + * in local memory and move it into the swcq. + */ + create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(&qhp->wq); + } + + /* if its a SQ completion, then do the magic to move all the + * unsignaled and now in-order completions into the swcq. + */ + if (SQ_TYPE(hw_cqe)) { + swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; + swsqe->cqe = *hw_cqe; + swsqe->complete = 1; + flush_completed_wrs(&qhp->wq, &chp->cq); + } else { + swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; + *swcqe = *hw_cqe; + swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); + t4_swcq_produce(&chp->cq); + } +next_cqe: + t4_hwcq_consume(&chp->cq); + ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); } } @@ -281,25 +425,6 @@ static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) return 1; } -void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count) -{ - struct t4_cqe *cqe; - u32 ptr; - - *count = 0; - ptr = cq->sw_cidx; - while (ptr != cq->sw_pidx) { - cqe = &cq->sw_queue[ptr]; - if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && - wq->sq.oldest_read)) && - (CQE_QPID(cqe) == wq->sq.qid)) - (*count)++; - if (++ptr == cq->size) - ptr = 0; - } - PDBG("%s cq %p count %d\n", __func__, cq, *count); -} - void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) { struct t4_cqe *cqe; @@ -319,70 +444,6 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) PDBG("%s cq %p count %d\n", __func__, cq, *count); } -static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) -{ - struct t4_swsqe *swsqe; - u16 ptr = wq->sq.cidx; - int count = wq->sq.in_use; - int unsignaled = 0; - - swsqe = &wq->sq.sw_sq[ptr]; - while (count--) - if (!swsqe->signaled) { - if (++ptr == wq->sq.size) - ptr = 0; - swsqe = &wq->sq.sw_sq[ptr]; - unsignaled++; - } else if (swsqe->complete) { - - /* - * Insert this completed cqe into the swcq. - */ - PDBG("%s moving cqe into swcq sq idx %u cq idx %u\n", - __func__, ptr, cq->sw_pidx); - swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); - cq->sw_queue[cq->sw_pidx] = swsqe->cqe; - t4_swcq_produce(cq); - swsqe->signaled = 0; - wq->sq.in_use -= unsignaled; - break; - } else - break; -} - -static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, - struct t4_cqe *read_cqe) -{ - read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; - read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len); - read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | - V_CQE_SWCQE(SW_CQE(hw_cqe)) | - V_CQE_OPCODE(FW_RI_READ_REQ) | - V_CQE_TYPE(1)); - read_cqe->bits_type_ts = hw_cqe->bits_type_ts; -} - -/* - * Return a ptr to the next read wr in the SWSQ or NULL. - */ -static void advance_oldest_read(struct t4_wq *wq) -{ - - u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1; - - if (rptr == wq->sq.size) - rptr = 0; - while (rptr != wq->sq.pidx) { - wq->sq.oldest_read = &wq->sq.sw_sq[rptr]; - - if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ) - return; - if (++rptr == wq->sq.size) - rptr = 0; - } - wq->sq.oldest_read = NULL; -} - /* * poll_cq * @@ -427,6 +488,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, } /* + * skip hw cqe's if the wq is flushed. + */ + if (wq->flushed && !SW_CQE(hw_cqe)) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* + * skip TERMINATE cqes... + */ + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + + /* * Gotta tweak READ completions: * 1) the cqe doesn't contain the sq_wptr from the wr. * 2) opcode not reflected from the wr. @@ -435,12 +512,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { - /* - * If this is an unsolicited read response, then the read + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* If this is an unsolicited read response, then the read * was generated by the kernel driver as part of peer-2-peer * connection setup. So ignore the completion. */ - if (!wq->sq.oldest_read) { + if (CQE_WRID_STAG(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) t4_set_wq_in_error(wq); ret = -EAGAIN; @@ -448,6 +535,15 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, } /* + * Eat completions for unsignaled read WRs. + */ + if (!wq->sq.oldest_read->signaled) { + advance_oldest_read(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* * Don't write to the HWCQ, so create a new read req CQE * in local memory. */ @@ -457,14 +553,8 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, } if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { - *cqe_flushed = t4_wq_in_error(wq); + *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); t4_set_wq_in_error(wq); - goto proc_cqe; - } - - if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { - ret = -EAGAIN; - goto skip_cqe; } /* @@ -523,7 +613,24 @@ proc_cqe: * completion. */ if (SQ_TYPE(hw_cqe)) { - wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe); + int idx = CQE_WRID_SQ_IDX(hw_cqe); + BUG_ON(idx >= wq->sq.size); + + /* + * Account for any unsignaled completions completed by + * this signaled completion. In this case, cidx points + * to the first unsignaled one, and idx points to the + * signaled one. So adjust in_use based on this delta. + * if this is not completing any unsigned wrs, then the + * delta will be 0. Handle wrapping also! + */ + if (idx < wq->sq.cidx) + wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; + else + wq->sq.in_use -= idx - wq->sq.cidx; + BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); + + wq->sq.cidx = (uint16_t)idx; PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; t4_sq_consume(wq); @@ -532,6 +639,7 @@ proc_cqe: *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; BUG_ON(t4_rq_empty(wq)); t4_rq_consume(wq); + goto skip_cqe; } flush_wq: @@ -565,7 +673,7 @@ skip_cqe: static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) { struct c4iw_qp *qhp = NULL; - struct t4_cqe cqe = {0, 0}, *rd_cqe; + struct t4_cqe uninitialized_var(cqe), *rd_cqe; struct t4_wq *wq; u32 credit = 0; u8 cqe_flushed; @@ -763,6 +871,9 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, rhp = to_c4iw_dev(ibdev); + if (vector >= rhp->rdev.lldi.nciq) + return ERR_PTR(-EINVAL); + chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); @@ -784,7 +895,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, /* * Make actual HW queue 2x to avoid cdix_inc overflows. */ - hwentries = entries * 2; + hwentries = min(entries * 2, T4_MAX_IQ_SIZE); /* * Make HW queue at least 64 entries so GTS updates aren't too @@ -808,6 +919,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, } chp->cq.size = hwentries; chp->cq.memsize = memsize; + chp->cq.vector = vector; ret = create_cq(&rhp->rdev, &chp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx); @@ -843,7 +955,8 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; spin_unlock(&ucontext->mmap_lock); - ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); if (ret) goto err5; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ba11c76c0b5..7db82b24302 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -41,10 +41,20 @@ #define DRV_VERSION "0.1" MODULE_AUTHOR("Steve Wise"); -MODULE_DESCRIPTION("Chelsio T4 RDMA Driver"); +MODULE_DESCRIPTION("Chelsio T4/T5 RDMA Driver"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(DRV_VERSION); +static int allow_db_fc_on_t5; +module_param(allow_db_fc_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_fc_on_t5, + "Allow DB Flow Control on T5 (default = 0)"); + +static int allow_db_coalescing_on_t5; +module_param(allow_db_coalescing_on_t5, int, 0644); +MODULE_PARM_DESC(allow_db_coalescing_on_t5, + "Allow DB Coalescing on T5 (default = 0)"); + struct uld_ctx { struct list_head entry; struct cxgb4_lld_info lldi; @@ -54,6 +64,10 @@ struct uld_ctx { static LIST_HEAD(uld_ctx_list); static DEFINE_MUTEX(dev_mutex); +#define DB_FC_RESUME_SIZE 64 +#define DB_FC_RESUME_DELAY 1 +#define DB_FC_DRAIN_THRESH 0 + static struct dentry *c4iw_debugfs_root; struct c4iw_debugfs_data { @@ -63,6 +77,16 @@ struct c4iw_debugfs_data { int pos; }; +/* registered cxgb4 netlink callbacks */ +static struct ibnl_client_cbs c4iw_nl_cb_table[] = { + [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, + [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, + [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, + [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, + [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, + [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} +}; + static int count_idrs(int id, void *p, void *data) { int *countp = data; @@ -93,18 +117,57 @@ static int dump_qp(int id, void *p, void *data) if (space == 0) return 1; - if (qp->ep) - cc = snprintf(qpd->buf + qpd->pos, space, - "qp sq id %u rq id %u state %u onchip %u " - "ep tid %u state %u %pI4:%u->%pI4:%u\n", - qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state, - qp->wq.sq.flags & T4_SQ_ONCHIP, - qp->ep->hwtid, (int)qp->ep->com.state, - &qp->ep->com.local_addr.sin_addr.s_addr, - ntohs(qp->ep->com.local_addr.sin_port), - &qp->ep->com.remote_addr.sin_addr.s_addr, - ntohs(qp->ep->com.remote_addr.sin_port)); - else + if (qp->ep) { + if (qp->ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin = (struct sockaddr_in *) + &qp->ep->com.local_addr; + struct sockaddr_in *rsin = (struct sockaddr_in *) + &qp->ep->com.remote_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &qp->ep->com.mapped_local_addr; + struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) + &qp->ep->com.mapped_remote_addr; + + cc = snprintf(qpd->buf + qpd->pos, space, + "rc qp sq id %u rq id %u state %u " + "onchip %u ep tid %u state %u " + "%pI4:%u/%u->%pI4:%u/%u\n", + qp->wq.sq.qid, qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP, + qp->ep->hwtid, (int)qp->ep->com.state, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(mapped_lsin->sin_port), + &rsin->sin_addr, ntohs(rsin->sin_port), + ntohs(mapped_rsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) + &qp->ep->com.local_addr; + struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) + &qp->ep->com.remote_addr; + struct sockaddr_in6 *mapped_lsin6 = + (struct sockaddr_in6 *) + &qp->ep->com.mapped_local_addr; + struct sockaddr_in6 *mapped_rsin6 = + (struct sockaddr_in6 *) + &qp->ep->com.mapped_remote_addr; + + cc = snprintf(qpd->buf + qpd->pos, space, + "rc qp sq id %u rq id %u state %u " + "onchip %u ep tid %u state %u " + "%pI6:%u/%u->%pI6:%u/%u\n", + qp->wq.sq.qid, qp->wq.rq.qid, + (int)qp->attr.state, + qp->wq.sq.flags & T4_SQ_ONCHIP, + qp->ep->hwtid, (int)qp->ep->com.state, + &lsin6->sin6_addr, + ntohs(lsin6->sin6_port), + ntohs(mapped_lsin6->sin6_port), + &rsin6->sin6_addr, + ntohs(rsin6->sin6_port), + ntohs(mapped_rsin6->sin6_port)); + } + } else cc = snprintf(qpd->buf + qpd->pos, space, "qp sq id %u rq id %u state %u onchip %u\n", qp->wq.sq.qid, qp->wq.rq.qid, @@ -247,7 +310,7 @@ static const struct file_operations stag_debugfs_fops = { .llseek = default_llseek, }; -static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"}; +static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"}; static int stats_show(struct seq_file *seq, void *v) { @@ -276,9 +339,10 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full); seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty); seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop); - seq_printf(seq, " DB State: %s Transitions %llu\n", + seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n", db_state_str[dev->db_state], - dev->rdev.stats.db_state_transitions); + dev->rdev.stats.db_state_transitions, + dev->rdev.stats.db_fc_interruptions); seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full); seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n", dev->rdev.stats.act_ofld_conn_fails); @@ -341,15 +405,49 @@ static int dump_ep(int id, void *p, void *data) if (space == 0) return 1; - cc = snprintf(epd->buf + epd->pos, space, - "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx " - "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n", - ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state, - ep->com.flags, ep->com.history, ep->hwtid, ep->atid, - &ep->com.local_addr.sin_addr.s_addr, - ntohs(ep->com.local_addr.sin_port), - &ep->com.remote_addr.sin_addr.s_addr, - ntohs(ep->com.remote_addr.sin_port)); + if (ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin = (struct sockaddr_in *) + &ep->com.local_addr; + struct sockaddr_in *rsin = (struct sockaddr_in *) + &ep->com.remote_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + struct sockaddr_in *mapped_rsin = (struct sockaddr_in *) + &ep->com.mapped_remote_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx " + "history 0x%lx hwtid %d atid %d " + "%pI4:%d/%d <-> %pI4:%d/%d\n", + ep, ep->com.cm_id, ep->com.qp, + (int)ep->com.state, ep->com.flags, + ep->com.history, ep->hwtid, ep->atid, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(mapped_lsin->sin_port), + &rsin->sin_addr, ntohs(rsin->sin_port), + ntohs(mapped_rsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) + &ep->com.local_addr; + struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *) + &ep->com.remote_addr; + struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_remote_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p qp %p state %d flags 0x%lx " + "history 0x%lx hwtid %d atid %d " + "%pI6:%d/%d <-> %pI6:%d/%d\n", + ep, ep->com.cm_id, ep->com.qp, + (int)ep->com.state, ep->com.flags, + ep->com.history, ep->hwtid, ep->atid, + &lsin6->sin6_addr, ntohs(lsin6->sin6_port), + ntohs(mapped_lsin6->sin6_port), + &rsin6->sin6_addr, ntohs(rsin6->sin6_port), + ntohs(mapped_rsin6->sin6_port)); + } if (cc < space) epd->pos += cc; return 0; @@ -366,12 +464,33 @@ static int dump_listen_ep(int id, void *p, void *data) if (space == 0) return 1; - cc = snprintf(epd->buf + epd->pos, space, - "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d " - "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state, - ep->com.flags, ep->stid, ep->backlog, - &ep->com.local_addr.sin_addr.s_addr, - ntohs(ep->com.local_addr.sin_port)); + if (ep->com.local_addr.ss_family == AF_INET) { + struct sockaddr_in *lsin = (struct sockaddr_in *) + &ep->com.local_addr; + struct sockaddr_in *mapped_lsin = (struct sockaddr_in *) + &ep->com.mapped_local_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d " + "backlog %d %pI4:%d/%d\n", + ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &lsin->sin_addr, ntohs(lsin->sin_port), + ntohs(mapped_lsin->sin_port)); + } else { + struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *) + &ep->com.local_addr; + struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *) + &ep->com.mapped_local_addr; + + cc = snprintf(epd->buf + epd->pos, space, + "ep %p cm_id %p state %d flags 0x%lx stid %d " + "backlog %d %pI6:%d/%d\n", + ep, ep->com.cm_id, (int)ep->com.state, + ep->com.flags, ep->stid, ep->backlog, + &lsin6->sin6_addr, ntohs(lsin6->sin6_port), + ntohs(mapped_lsin6->sin6_port)); + } if (cc < space) epd->pos += cc; return 0; @@ -530,10 +649,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, rdev->lldi.vr->cq.size); - PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " + PDBG("udb len 0x%x udb base %llx db_reg %p gts_reg %p qpshift %lu " "qpmask 0x%x cqshift %lu cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), - (void *)pci_resource_start(rdev->lldi.pdev, 2), + (u64)pci_resource_start(rdev->lldi.pdev, 2), rdev->lldi.db_reg, rdev->lldi.gts_reg, rdev->qpshift, rdev->qpmask, @@ -571,6 +690,13 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err); goto err4; } + rdev->status_page = (struct t4_dev_status_page *) + __get_free_page(GFP_KERNEL); + if (!rdev->status_page) { + pr_err(MOD "error allocating status page\n"); + goto err4; + } + rdev->status_page->db_off = 0; return 0; err4: c4iw_rqtpool_destroy(rdev); @@ -584,6 +710,7 @@ err1: static void c4iw_rdev_close(struct c4iw_rdev *rdev) { + free_page((unsigned long)rdev->status_page); c4iw_pblpool_destroy(rdev); c4iw_rqtpool_destroy(rdev); c4iw_destroy_resource(&rdev->resource); @@ -598,7 +725,10 @@ static void c4iw_dealloc(struct uld_ctx *ctx) idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); - iounmap(ctx->dev->rdev.oc_mw_kva); + if (ctx->dev->rdev.bar2_kva) + iounmap(ctx->dev->rdev.bar2_kva); + if (ctx->dev->rdev.oc_mw_kva) + iounmap(ctx->dev->rdev.oc_mw_kva); ib_dealloc_device(&ctx->dev->ibdev); ctx->dev = NULL; } @@ -614,7 +744,7 @@ static int rdma_supported(const struct cxgb4_lld_info *infop) { return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 && infop->vr->rq.size > 0 && infop->vr->qp.size > 0 && - infop->vr->cq.size > 0 && infop->vr->ocq.size > 0; + infop->vr->cq.size > 0; } static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) @@ -627,6 +757,10 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) pci_name(infop->pdev)); return ERR_PTR(-ENOSYS); } + if (!ocqp_supported(infop)) + pr_info("%s: On-Chip Queues not supported on this device.\n", + pci_name(infop->pdev)); + devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); if (!devp) { printk(KERN_ERR MOD "Cannot allocate ib device\n"); @@ -634,11 +768,33 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) } devp->rdev.lldi = *infop; - devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) + - (pci_resource_len(devp->rdev.lldi.pdev, 2) - - roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size)); - devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, - devp->rdev.lldi.vr->ocq.size); + /* + * For T5 devices, we map all of BAR2 with WC. + * For T4 devices with onchip qp mem, we map only that part + * of BAR2 with WC. + */ + devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2); + if (is_t5(devp->rdev.lldi.adapter_type)) { + devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa, + pci_resource_len(devp->rdev.lldi.pdev, 2)); + if (!devp->rdev.bar2_kva) { + pr_err(MOD "Unable to ioremap BAR2\n"); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(-EINVAL); + } + } else if (ocqp_supported(infop)) { + devp->rdev.oc_mw_pa = + pci_resource_start(devp->rdev.lldi.pdev, 2) + + pci_resource_len(devp->rdev.lldi.pdev, 2) - + roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size); + devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa, + devp->rdev.lldi.vr->ocq.size); + if (!devp->rdev.oc_mw_kva) { + pr_err(MOD "Unable to ioremap onchip mem\n"); + ib_dealloc_device(&devp->ibdev); + return ERR_PTR(-EINVAL); + } + } PDBG(KERN_INFO MOD "ocq memory: " "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n", @@ -661,6 +817,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) spin_lock_init(&devp->lock); mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); + INIT_LIST_HEAD(&devp->db_fc_list); if (c4iw_debugfs_root) { devp->debugfs_root = debugfs_create_dir( @@ -668,6 +825,8 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) c4iw_debugfs_root); setup_debugfs(devp); } + + return devp; } @@ -678,8 +837,8 @@ static void *c4iw_uld_add(const struct cxgb4_lld_info *infop) int i; if (!vers_printed++) - printk(KERN_INFO MOD "Chelsio T4 RDMA Driver - version %s\n", - DRV_VERSION); + pr_info("Chelsio T4/T5 RDMA Driver - version %s\n", + DRV_VERSION); ctx = kzalloc(sizeof *ctx, GFP_KERNEL); if (!ctx) { @@ -797,7 +956,8 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, "RSS %#llx, FL %#llx, len %u\n", pci_name(ctx->lldi.pdev), gl->va, (unsigned long long)be64_to_cpu(*rsp), - (unsigned long long)be64_to_cpu(*(u64 *)gl->va), + (unsigned long long)be64_to_cpu( + *(__force __be64 *)gl->va), gl->tot_len); return 0; @@ -808,11 +968,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, } opcode = *(u8 *)rsp; - if (c4iw_handlers[opcode]) + if (c4iw_handlers[opcode]) { c4iw_handlers[opcode](dev, skb); - else + } else { pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); + kfree_skb(skb); + } return 0; nomem: @@ -888,13 +1050,16 @@ static int disable_qp_db(int id, void *p, void *data) static void stop_queues(struct uld_ctx *ctx) { - spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->db_state == NORMAL) { - ctx->dev->rdev.stats.db_state_transitions++; - ctx->dev->db_state = FLOW_CONTROL; + unsigned long flags; + + spin_lock_irqsave(&ctx->dev->lock, flags); + ctx->dev->rdev.stats.db_state_transitions++; + ctx->dev->db_state = STOPPED; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); - } - spin_unlock_irq(&ctx->dev->lock); + else + ctx->dev->rdev.status_page->db_off = 1; + spin_unlock_irqrestore(&ctx->dev->lock, flags); } static int enable_qp_db(int id, void *p, void *data) @@ -905,15 +1070,72 @@ static int enable_qp_db(int id, void *p, void *data) return 0; } +static void resume_rc_qp(struct c4iw_qp *qp) +{ + spin_lock(&qp->lock); + t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, + is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + qp->wq.sq.wq_pidx_inc = 0; + t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, + is_t5(qp->rhp->rdev.lldi.adapter_type), NULL); + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); +} + +static void resume_a_chunk(struct uld_ctx *ctx) +{ + int i; + struct c4iw_qp *qp; + + for (i = 0; i < DB_FC_RESUME_SIZE; i++) { + qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp, + db_fc_entry); + list_del_init(&qp->db_fc_entry); + resume_rc_qp(qp); + if (list_empty(&ctx->dev->db_fc_list)) + break; + } +} + static void resume_queues(struct uld_ctx *ctx) { spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->qpcnt <= db_fc_threshold && - ctx->dev->db_state == FLOW_CONTROL) { - ctx->dev->db_state = NORMAL; - ctx->dev->rdev.stats.db_state_transitions++; - idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); + if (ctx->dev->db_state != STOPPED) + goto out; + ctx->dev->db_state = FLOW_CONTROL; + while (1) { + if (list_empty(&ctx->dev->db_fc_list)) { + WARN_ON(ctx->dev->db_state != FLOW_CONTROL); + ctx->dev->db_state = NORMAL; + ctx->dev->rdev.stats.db_state_transitions++; + if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) { + idr_for_each(&ctx->dev->qpidr, enable_qp_db, + NULL); + } else { + ctx->dev->rdev.status_page->db_off = 0; + } + break; + } else { + if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) + < (ctx->dev->rdev.lldi.dbfifo_int_thresh << + DB_FC_DRAIN_THRESH)) { + resume_a_chunk(ctx); + } + if (!list_empty(&ctx->dev->db_fc_list)) { + spin_unlock_irq(&ctx->dev->lock); + if (DB_FC_RESUME_DELAY) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(DB_FC_RESUME_DELAY); + } + spin_lock_irq(&ctx->dev->lock); + if (ctx->dev->db_state != FLOW_CONTROL) + break; + } + } } +out: + if (ctx->dev->db_state != NORMAL) + ctx->dev->rdev.stats.db_fc_interruptions++; spin_unlock_irq(&ctx->dev->lock); } @@ -939,12 +1161,12 @@ static int count_qps(int id, void *p, void *data) return 0; } -static void deref_qps(struct qp_list qp_list) +static void deref_qps(struct qp_list *qp_list) { int idx; - for (idx = 0; idx < qp_list.idx; idx++) - c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp); + for (idx = 0; idx < qp_list->idx; idx++) + c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp); } static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) @@ -955,17 +1177,22 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) for (idx = 0; idx < qp_list->idx; idx++) { struct c4iw_qp *qp = qp_list->qps[idx]; + spin_lock_irq(&qp->rhp->lock); + spin_lock(&qp->lock); ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], qp->wq.sq.qid, t4_sq_host_wq_pidx(&qp->wq), t4_sq_wq_size(&qp->wq)); if (ret) { - printk(KERN_ERR MOD "%s: Fatal error - " + pr_err(KERN_ERR MOD "%s: Fatal error - " "DB overflow recovery failed - " "error syncing SQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.sq.qid); + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); return; } + qp->wq.sq.wq_pidx_inc = 0; ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0], qp->wq.rq.qid, @@ -973,12 +1200,17 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list) t4_rq_wq_size(&qp->wq)); if (ret) { - printk(KERN_ERR MOD "%s: Fatal error - " + pr_err(KERN_ERR MOD "%s: Fatal error - " "DB overflow recovery failed - " "error syncing RQ qid %u\n", pci_name(ctx->lldi.pdev), qp->wq.rq.qid); + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); return; } + qp->wq.rq.wq_pidx_inc = 0; + spin_unlock(&qp->lock); + spin_unlock_irq(&qp->rhp->lock); /* Wait for the dbfifo to drain */ while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) { @@ -994,36 +1226,22 @@ static void recover_queues(struct uld_ctx *ctx) struct qp_list qp_list; int ret; - /* lock out kernel db ringers */ - mutex_lock(&ctx->dev->db_mutex); - - /* put all queues in to recovery mode */ - spin_lock_irq(&ctx->dev->lock); - ctx->dev->db_state = RECOVERY; - ctx->dev->rdev.stats.db_state_transitions++; - idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL); - spin_unlock_irq(&ctx->dev->lock); - /* slow everybody down */ set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(usecs_to_jiffies(1000)); - /* Wait for the dbfifo to completely drain. */ - while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(10)); - } - /* flush the SGE contexts */ ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]); if (ret) { printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", pci_name(ctx->lldi.pdev)); - goto out; + return; } /* Count active queues so we can build a list of queues to recover */ spin_lock_irq(&ctx->dev->lock); + WARN_ON(ctx->dev->db_state != STOPPED); + ctx->dev->db_state = RECOVERY; idr_for_each(&ctx->dev->qpidr, count_qps, &count); qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC); @@ -1031,7 +1249,7 @@ static void recover_queues(struct uld_ctx *ctx) printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n", pci_name(ctx->lldi.pdev)); spin_unlock_irq(&ctx->dev->lock); - goto out; + return; } qp_list.idx = 0; @@ -1044,29 +1262,13 @@ static void recover_queues(struct uld_ctx *ctx) recover_lost_dbs(ctx, &qp_list); /* we're almost done! deref the qps and clean up */ - deref_qps(qp_list); + deref_qps(&qp_list); kfree(qp_list.qps); - /* Wait for the dbfifo to completely drain again */ - while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) { - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(10)); - } - - /* resume the queues */ spin_lock_irq(&ctx->dev->lock); - if (ctx->dev->qpcnt > db_fc_threshold) - ctx->dev->db_state = FLOW_CONTROL; - else { - ctx->dev->db_state = NORMAL; - idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL); - } - ctx->dev->rdev.stats.db_state_transitions++; + WARN_ON(ctx->dev->db_state != RECOVERY); + ctx->dev->db_state = STOPPED; spin_unlock_irq(&ctx->dev->lock); - -out: - /* start up kernel db ringers again */ - mutex_unlock(&ctx->dev->db_mutex); } static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) @@ -1076,9 +1278,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) switch (control) { case CXGB4_CONTROL_DB_FULL: stop_queues(ctx); - mutex_lock(&ctx->dev->rdev.stats.lock); ctx->dev->rdev.stats.db_full++; - mutex_unlock(&ctx->dev->rdev.stats.lock); break; case CXGB4_CONTROL_DB_EMPTY: resume_queues(ctx); @@ -1121,6 +1321,20 @@ static int __init c4iw_init_module(void) printk(KERN_WARNING MOD "could not create debugfs entry, continuing\n"); + if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS, + c4iw_nl_cb_table)) + pr_err("%s[%u]: Failed to add netlink callback\n" + , __func__, __LINE__); + + err = iwpm_init(RDMA_NL_C4IW); + if (err) { + pr_err("port mapper initialization failed with %d\n", err); + ibnl_remove_client(RDMA_NL_C4IW); + c4iw_cm_term(); + debugfs_remove_recursive(c4iw_debugfs_root); + return err; + } + cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info); return 0; @@ -1138,6 +1352,8 @@ static void __exit c4iw_exit_module(void) } mutex_unlock(&dev_mutex); cxgb4_unregister_uld(CXGB4_ULD_RDMA); + iwpm_exit(RDMA_NL_C4IW); + ibnl_remove_client(RDMA_NL_C4IW); c4iw_cm_term(); debugfs_remove_recursive(c4iw_debugfs_root); } diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index cf2f6b47617..d61d0a18f78 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -44,14 +44,6 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, struct c4iw_qp_attributes attrs; unsigned long flag; - if ((qhp->attr.state == C4IW_QP_STATE_ERROR) || - (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) { - PDBG("%s AE received after RTS - " - "qp state %d qpid 0x%x status 0x%x\n", __func__, - qhp->attr.state, qhp->wq.sq.qid, CQE_STATUS(err_cqe)); - return; - } - printk(KERN_ERR MOD "AE qpid 0x%x opcode %d status 0x%x " "type %d wrid.hi 0x%x wrid.lo 0x%x\n", CQE_QPID(err_cqe), CQE_OPCODE(err_cqe), diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c index f95e5df30db..0161ae6ad62 100644 --- a/drivers/infiniband/hw/cxgb4/id_table.c +++ b/drivers/infiniband/hw/cxgb4/id_table.c @@ -54,7 +54,7 @@ u32 c4iw_id_alloc(struct c4iw_id_table *alloc) if (obj < alloc->max) { if (alloc->flags & C4IW_ID_TABLE_F_RANDOM) - alloc->last += random32() % RANDOM_SKIP; + alloc->last += prandom_u32() % RANDOM_SKIP; else alloc->last = obj + 1; if (alloc->last >= alloc->max) @@ -88,7 +88,7 @@ int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, alloc->start = start; alloc->flags = flags; if (flags & C4IW_ID_TABLE_F_RANDOM) - alloc->last = random32() % RANDOM_SKIP; + alloc->last = prandom_u32() % RANDOM_SKIP; else alloc->last = 0; alloc->max = num; diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 9c1644fb025..361fff7a074 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -52,6 +52,8 @@ #include <rdma/ib_verbs.h> #include <rdma/iw_cm.h> +#include <rdma/rdma_netlink.h> +#include <rdma/iw_portmap.h> #include "cxgb4.h" #include "cxgb4_uld.h" @@ -109,6 +111,7 @@ struct c4iw_dev_ucontext { enum c4iw_rdev_flags { T4_FATAL_ERROR = (1<<0), + T4_STATUS_PAGE_DISABLED = (1<<1), }; struct c4iw_stat { @@ -130,6 +133,7 @@ struct c4iw_stats { u64 db_empty; u64 db_drop; u64 db_state_transitions; + u64 db_fc_interruptions; u64 tcam_full; u64 act_ofld_conn_fails; u64 pas_ofld_conn_fails; @@ -147,9 +151,12 @@ struct c4iw_rdev { struct gen_pool *ocqp_pool; u32 flags; struct cxgb4_lld_info lldi; + unsigned long bar2_pa; + void __iomem *bar2_kva; unsigned long oc_mw_pa; void __iomem *oc_mw_kva; struct c4iw_stats stats; + struct t4_dev_status_page *status_page; }; static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) @@ -162,7 +169,7 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev) return min((int)T4_MAX_NUM_STAG, (int)(rdev->lldi.vr->stag.size >> 5)); } -#define C4IW_WR_TO (10*HZ) +#define C4IW_WR_TO (30*HZ) struct c4iw_wr_wait { struct completion completion; @@ -211,7 +218,8 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, enum db_state { NORMAL = 0, FLOW_CONTROL = 1, - RECOVERY = 2 + RECOVERY = 2, + STOPPED = 3 }; struct c4iw_dev { @@ -225,10 +233,10 @@ struct c4iw_dev { struct mutex db_mutex; struct dentry *debugfs_root; enum db_state db_state; - int qpcnt; struct idr hwtid_idr; struct idr atid_idr; struct idr stid_idr; + struct list_head db_fc_list; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -260,20 +268,21 @@ static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, void *handle, u32 id, int lock) { int ret; - int newid; - do { - if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC)) - return -ENOMEM; - if (lock) - spin_lock_irq(&rhp->lock); - ret = idr_get_new_above(idr, handle, id, &newid); - BUG_ON(!ret && newid != id); - if (lock) - spin_unlock_irq(&rhp->lock); - } while (ret == -EAGAIN); + if (lock) { + idr_preload(GFP_KERNEL); + spin_lock_irq(&rhp->lock); + } + + ret = idr_alloc(idr, handle, id, id + 1, GFP_ATOMIC); - return ret; + if (lock) { + spin_unlock_irq(&rhp->lock); + idr_preload_end(); + } + + BUG_ON(ret == -ENOSPC); + return ret < 0 ? ret : 0; } static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr, @@ -368,7 +377,7 @@ struct c4iw_fr_page_list { DEFINE_DMA_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; - int size; + int pll_len; }; static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( @@ -428,10 +437,12 @@ struct c4iw_qp_attributes { u8 ecode; u16 sq_db_inc; u16 rq_db_inc; + u8 send_term; }; struct c4iw_qp { struct ib_qp ibqp; + struct list_head db_fc_entry; struct c4iw_dev *rhp; struct c4iw_ep *ep; struct c4iw_qp_attributes attr; @@ -441,6 +452,7 @@ struct c4iw_qp { atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; + int sq_sig_all; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) @@ -716,6 +728,9 @@ enum c4iw_ep_flags { ABORT_REQ_IN_PROGRESS = 1, RELEASE_RESOURCES = 2, CLOSE_SENT = 3, + TIMEOUT = 4, + QP_REFERENCED = 5, + RELEASE_MAPINFO = 6, }; enum c4iw_ep_history { @@ -750,8 +765,10 @@ struct c4iw_ep_common { enum c4iw_ep_state state; struct kref kref; struct mutex mutex; - struct sockaddr_in local_addr; - struct sockaddr_in remote_addr; + struct sockaddr_storage local_addr; + struct sockaddr_storage remote_addr; + struct sockaddr_storage mapped_local_addr; + struct sockaddr_storage mapped_remote_addr; struct c4iw_wr_wait wr_wait; unsigned long flags; unsigned long history; @@ -793,7 +810,48 @@ struct c4iw_ep { u8 retry_with_mpa_v1; u8 tried_with_mpa_v1; unsigned int retry_count; -}; + int snd_win; + int rcv_win; +}; + +static inline void print_addr(struct c4iw_ep_common *epc, const char *func, + const char *msg) +{ + +#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr)) +#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port) +#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr)) +#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port) + + if (c4iw_debug) { + switch (epc->local_addr.ss_family) { + case AF_INET: + PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n", + func, msg, SINA(&epc->local_addr), + SINP(&epc->local_addr), + SINP(&epc->mapped_local_addr), + SINA(&epc->remote_addr), + SINP(&epc->remote_addr), + SINP(&epc->mapped_remote_addr)); + break; + case AF_INET6: + PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n", + func, msg, SIN6A(&epc->local_addr), + SIN6P(&epc->local_addr), + SIN6P(&epc->mapped_local_addr), + SIN6A(&epc->remote_addr), + SIN6P(&epc->remote_addr), + SIN6P(&epc->mapped_remote_addr)); + break; + default: + break; + } + } +#undef SINA +#undef SINP +#undef SIN6A +#undef SIN6P +} static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) { @@ -814,6 +872,15 @@ static inline int compute_wscale(int win) return wscale; } +static inline int ocqp_supported(const struct cxgb4_lld_info *infop) +{ +#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) + return infop->vr->ocq.size > 0; +#else + return 0; +#endif +} + u32 c4iw_id_alloc(struct c4iw_id_table *alloc); void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj); int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num, @@ -841,7 +908,7 @@ int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_register_device(struct c4iw_dev *dev); void c4iw_unregister_device(struct c4iw_dev *dev); int __init c4iw_cm_init(void); -void __exit c4iw_cm_term(void); +void c4iw_cm_term(void); void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, @@ -866,7 +933,7 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl( int page_list_len); struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth); int c4iw_dealloc_mw(struct ib_mw *mw); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd); +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); @@ -906,12 +973,11 @@ void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size); int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb); -void c4iw_flush_hw_cq(struct t4_cq *cq); +void c4iw_flush_hw_cq(struct c4iw_cq *chp); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); -void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count); int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); -int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count); +int c4iw_flush_sq(struct c4iw_qp *qhp); int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid); u16 c4iw_rqes_posted(struct c4iw_qp *qhp); int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe); @@ -927,6 +993,8 @@ extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; extern int c4iw_max_read_depth; extern int db_fc_threshold; +extern int db_coalescing_threshold; +extern int use_dsgl; #endif diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index afd81790ab3..ec7a2988a70 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -30,16 +30,76 @@ * SOFTWARE. */ +#include <linux/module.h> +#include <linux/moduleparam.h> #include <rdma/ib_umem.h> #include <linux/atomic.h> #include "iw_cxgb4.h" +int use_dsgl = 0; +module_param(use_dsgl, int, 0644); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)"); + #define T4_ULPTX_MIN_IO 32 #define C4IW_MAX_INLINE_SIZE 96 +#define T4_ULPTX_MAX_DMA 1024 +#define C4IW_INLINE_THRESHOLD 128 -static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, - void *data) +static int inline_threshold = C4IW_INLINE_THRESHOLD; +module_param(inline_threshold, int, 0644); +MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)"); + +static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr, + u32 len, dma_addr_t data, int wait) +{ + struct sk_buff *skb; + struct ulp_mem_io *req; + struct ulptx_sgl *sgl; + u8 wr_len; + int ret = 0; + struct c4iw_wr_wait wr_wait; + + addr &= 0x7FFFFFF; + + if (wait) + c4iw_init_wr_wait(&wr_wait); + wr_len = roundup(sizeof(*req) + sizeof(*sgl), 16); + + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + return -ENOMEM; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + req = (struct ulp_mem_io *)__skb_put(skb, wr_len); + memset(req, 0, wr_len); + INIT_ULPTX_WR(req, wr_len, 0, 0); + req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) | + (wait ? FW_WR_COMPL(1) : 0)); + req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L; + req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); + req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1)); + req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN(len>>5)); + req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), 16)); + req->lock_addr = cpu_to_be32(ULP_MEMIO_ADDR(addr)); + + sgl = (struct ulptx_sgl *)(req + 1); + sgl->cmd_nsge = cpu_to_be32(ULPTX_CMD(ULP_TX_SC_DSGL) | + ULPTX_NSGE(1)); + sgl->len0 = cpu_to_be32(len); + sgl->addr0 = cpu_to_be64(data); + + ret = c4iw_ofld_send(rdev, skb); + if (ret) + return ret; + if (wait) + ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__); + return ret; +} + +static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data) { struct sk_buff *skb; struct ulp_mem_io *req; @@ -47,6 +107,12 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, u8 wr_len, *to_dp, *from_dp; int copy_len, num_wqe, i, ret = 0; struct c4iw_wr_wait wr_wait; + __be32 cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE)); + + if (is_t4(rdev->lldi.adapter_type)) + cmd |= cpu_to_be32(ULP_MEMIO_ORDER(1)); + else + cmd |= cpu_to_be32(V_T5_ULP_MEMIO_IMM(1)); addr &= 0x7FFFFFF; PDBG("%s addr 0x%x len %u\n", __func__, addr, len); @@ -77,7 +143,7 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, req->wr.wr_mid = cpu_to_be32( FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16))); - req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE) | (1<<23)); + req->cmd = cmd; req->dlen = cpu_to_be32(ULP_MEMIO_DATA_LEN( DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO))); req->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(req->wr), @@ -107,6 +173,67 @@ static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, return ret; } +static int _c4iw_write_mem_dma(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data) +{ + u32 remain = len; + u32 dmalen; + int ret = 0; + dma_addr_t daddr; + dma_addr_t save; + + daddr = dma_map_single(&rdev->lldi.pdev->dev, data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&rdev->lldi.pdev->dev, daddr)) + return -1; + save = daddr; + + while (remain > inline_threshold) { + if (remain < T4_ULPTX_MAX_DMA) { + if (remain & ~T4_ULPTX_MIN_IO) + dmalen = remain & ~(T4_ULPTX_MIN_IO-1); + else + dmalen = remain; + } else + dmalen = T4_ULPTX_MAX_DMA; + remain -= dmalen; + ret = _c4iw_write_mem_dma_aligned(rdev, addr, dmalen, daddr, + !remain); + if (ret) + goto out; + addr += dmalen >> 5; + data += dmalen; + daddr += dmalen; + } + if (remain) + ret = _c4iw_write_mem_inline(rdev, addr, remain, data); +out: + dma_unmap_single(&rdev->lldi.pdev->dev, save, len, DMA_TO_DEVICE); + return ret; +} + +/* + * write len bytes of data into addr (32B aligned address) + * If data is NULL, clear len byte of memory to zero. + */ +static int write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, + void *data) +{ + if (is_t5(rdev->lldi.adapter_type) && use_dsgl) { + if (len > inline_threshold) { + if (_c4iw_write_mem_dma(rdev, addr, len, data)) { + printk_ratelimited(KERN_WARNING + "%s: dma map" + " failure (non fatal)\n", + pci_name(rdev->lldi.pdev)); + return _c4iw_write_mem_inline(rdev, addr, len, + data); + } else + return 0; + } else + return _c4iw_write_mem_inline(rdev, addr, len, data); + } else + return _c4iw_write_mem_inline(rdev, addr, len, data); +} + /* * Build and write a TPT entry. * IN: stag key, pdid, perm, bind_enabled, zbva, to, len, page_size, @@ -132,8 +259,12 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); - if (!stag_idx) + if (!stag_idx) { + mutex_lock(&rdev->stats.lock); + rdev->stats.stag.fail++; + mutex_unlock(&rdev->stats.lock); return -ENOMEM; + } mutex_lock(&rdev->stats.lock); rdev->stats.stag.cur += 32; if (rdev->stats.stag.cur > rdev->stats.stag.max) @@ -551,9 +682,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -583,10 +714,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; - + n = mhp->umem->nmap; err = alloc_pbl(mhp, n); if (err) goto err; @@ -599,24 +727,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + - mhp->umem->page_size * k); - if (i == PAGE_SIZE / sizeof *pages) { - err = write_pbl(&mhp->rhp->rdev, - pages, - mhp->attr.pbl_addr + (n << 3), i); - if (err) - goto pbl_done; - n += i; - i = 0; - } + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address(sg) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = write_pbl(&mhp->rhp->rdev, + pages, + mhp->attr.pbl_addr + (n << 3), i); + if (err) + goto pbl_done; + n += i; + i = 0; } } + } if (i) err = write_pbl(&mhp->rhp->rdev, pages, @@ -650,7 +776,7 @@ err: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd) +struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -659,6 +785,9 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd) u32 stag = 0; int ret; + if (type != IB_MW_TYPE_1) + return ERR_PTR(-EINVAL); + php = to_c4iw_pd(pd); rhp = php->rhp; mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); @@ -757,19 +886,27 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, struct c4iw_fr_page_list *c4pl; struct c4iw_dev *dev = to_c4iw_dev(device); dma_addr_t dma_addr; - int size = sizeof *c4pl + page_list_len * sizeof(u64); + int pll_len = roundup(page_list_len * sizeof(u64), 32); - c4pl = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, size, - &dma_addr, GFP_KERNEL); + c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL); if (!c4pl) return ERR_PTR(-ENOMEM); + c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev, + pll_len, &dma_addr, + GFP_KERNEL); + if (!c4pl->ibpl.page_list) { + kfree(c4pl); + return ERR_PTR(-ENOMEM); + } dma_unmap_addr_set(c4pl, mapping, dma_addr); c4pl->dma_addr = dma_addr; c4pl->dev = dev; - c4pl->size = size; - c4pl->ibpl.page_list = (u64 *)(c4pl + 1); - c4pl->ibpl.max_page_list_len = page_list_len; + c4pl->pll_len = pll_len; + + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); return &c4pl->ibpl; } @@ -778,8 +915,14 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) { struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); - dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, c4pl->size, - c4pl, dma_unmap_addr(c4pl, mapping)); + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); + + dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, + c4pl->pll_len, + c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); + kfree(c4pl); } int c4iw_dereg_mr(struct ib_mr *ib_mr) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index e084fdc6da7..b1d305338de 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -106,15 +106,57 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev, { struct c4iw_ucontext *context; struct c4iw_dev *rhp = to_c4iw_dev(ibdev); + static int warned; + struct c4iw_alloc_ucontext_resp uresp; + int ret = 0; + struct c4iw_mm_entry *mm = NULL; PDBG("%s ibdev %p\n", __func__, ibdev); context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return ERR_PTR(-ENOMEM); + if (!context) { + ret = -ENOMEM; + goto err; + } + c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx); INIT_LIST_HEAD(&context->mmaps); spin_lock_init(&context->mmap_lock); + + if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) { + if (!warned++) + pr_err(MOD "Warning - downlevel libcxgb4 (non-fatal), device status page disabled."); + rhp->rdev.flags |= T4_STATUS_PAGE_DISABLED; + } else { + mm = kmalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) { + ret = -ENOMEM; + goto err_free; + } + + uresp.status_page_size = PAGE_SIZE; + + spin_lock(&context->mmap_lock); + uresp.status_page_key = context->key; + context->key += PAGE_SIZE; + spin_unlock(&context->mmap_lock); + + ret = ib_copy_to_udata(udata, &uresp, + sizeof(uresp) - sizeof(uresp.reserved)); + if (ret) + goto err_mm; + + mm->key = uresp.status_page_key; + mm->addr = virt_to_phys(rhp->rdev.status_page); + mm->len = PAGE_SIZE; + insert_mmap(context, mm); + } return &context->ibucontext; +err_mm: + kfree(mm); +err_free: + kfree(context); +err: + return ERR_PTR(ret); } static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) @@ -162,8 +204,14 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) */ if (addr >= rdev->oc_mw_pa) vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); - else - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + else { + if (is_t5(rdev->lldi.adapter_type)) + vma->vm_page_prot = + t4_pgprot_wc(vma->vm_page_prot); + else + vma->vm_page_prot = + pgprot_noncached(vma->vm_page_prot); + } ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, len, vma->vm_page_prot); @@ -263,7 +311,7 @@ static int c4iw_query_device(struct ib_device *ibdev, dev = to_c4iw_dev(ibdev); memset(props, 0, sizeof *props); memcpy(&props->sys_image_guid, dev->rdev.lldi.ports[0]->dev_addr, 6); - props->hw_ver = dev->rdev.lldi.adapter_type; + props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type); props->fw_ver = dev->rdev.lldi.fw_vers; props->device_cap_flags = dev->device_cap_flags; props->page_size_cap = T4_PAGESIZE_MASK; @@ -281,7 +329,7 @@ static int c4iw_query_device(struct ib_device *ibdev, props->max_mr = c4iw_num_stags(&dev->rdev); props->max_pd = T4_MAX_NUM_PD; props->local_ca_ack_delay = 0; - props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH; + props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl); return 0; } @@ -346,7 +394,8 @@ static ssize_t show_rev(struct device *dev, struct device_attribute *attr, struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev, ibdev.dev); PDBG("%s dev 0x%p\n", __func__, dev); - return sprintf(buf, "%d\n", c4iw_dev->rdev.lldi.adapter_type); + return sprintf(buf, "%d\n", + CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type)); } static ssize_t show_fw_ver(struct device *dev, struct device_attribute *attr, @@ -451,7 +500,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.node_type = RDMA_NODE_RNIC; memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; - dev->ibdev.num_comp_vectors = 1; + dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev); dev->ibdev.query_device = c4iw_query_device; dev->ibdev.query_port = c4iw_query_port; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 05bfe53bff6..086f62f5dc9 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -42,10 +42,21 @@ static int ocqp_support = 1; module_param(ocqp_support, int, 0644); MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); -int db_fc_threshold = 2000; +int db_fc_threshold = 1000; module_param(db_fc_threshold, int, 0644); -MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic " - "db flow control mode (default = 2000)"); +MODULE_PARM_DESC(db_fc_threshold, + "QP count/threshold that triggers" + " automatic db flow control mode (default = 1000)"); + +int db_coalescing_threshold; +module_param(db_coalescing_threshold, int, 0644); +MODULE_PARM_DESC(db_coalescing_threshold, + "QP count/threshold that triggers" + " disabling db coalescing (default = 0)"); + +static int max_fr_immd = T4_MAX_FR_IMMD; +module_param(max_fr_immd, int, 0644); +MODULE_PARM_DESC(max_fr_immd, "fastreg threshold for using DSGL instead of immedate"); static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) { @@ -76,7 +87,7 @@ static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) static int alloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) { - if (!ocqp_support || !t4_ocqp_supported()) + if (!ocqp_support || !ocqp_supported(&rdev->lldi)) return -ENOSYS; sq->dma_addr = c4iw_ocqp_pool_alloc(rdev, sq->memsize); if (!sq->dma_addr) @@ -100,6 +111,16 @@ static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq) return 0; } +static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user) +{ + int ret = -ENOSYS; + if (user) + ret = alloc_oc_sq(rdev, sq); + if (ret) + ret = alloc_host_sq(rdev, sq); + return ret; +} + static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct c4iw_dev_ucontext *uctx) { @@ -129,7 +150,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, int wr_len; struct c4iw_wr_wait wr_wait; struct sk_buff *skb; - int ret; + int ret = 0; int eqsize; wq->sq.qid = c4iw_get_qpid(rdev, uctx); @@ -168,26 +189,19 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, goto free_sw_rq; } - if (user) { - ret = alloc_oc_sq(rdev, &wq->sq); - if (ret) - goto free_hwaddr; - - ret = alloc_host_sq(rdev, &wq->sq); - if (ret) - goto free_sq; - } else - ret = alloc_host_sq(rdev, &wq->sq); - if (ret) - goto free_hwaddr; + ret = alloc_sq(rdev, &wq->sq, user); + if (ret) + goto free_hwaddr; memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, &(wq->rq.dma_addr), GFP_KERNEL); - if (!wq->rq.queue) + if (!wq->rq.queue) { + ret = -ENOMEM; goto free_sq; + } PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", __func__, wq->sq.queue, (unsigned long long)virt_to_phys(wq->sq.queue), @@ -198,13 +212,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, wq->db = rdev->lldi.db_reg; wq->gts = rdev->lldi.gts_reg; - if (user) { - wq->sq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (wq->sq.qid << rdev->qpshift); - wq->sq.udb &= PAGE_MASK; - wq->rq.udb = (u64)pci_resource_start(rdev->lldi.pdev, 2) + - (wq->rq.qid << rdev->qpshift); - wq->rq.udb &= PAGE_MASK; + if (user || is_t5(rdev->lldi.adapter_type)) { + u32 off; + + off = (wq->sq.qid << rdev->qpshift) & PAGE_MASK; + if (user) { + wq->sq.udb = (u64 __iomem *)(rdev->bar2_pa + off); + } else { + off += 128 * (wq->sq.qid & rdev->qpmask) + 8; + wq->sq.udb = (u64 __iomem *)(rdev->bar2_kva + off); + } + off = (wq->rq.qid << rdev->qpshift) & PAGE_MASK; + if (user) { + wq->rq.udb = (u64 __iomem *)(rdev->bar2_pa + off); + } else { + off += 128 * (wq->rq.qid & rdev->qpmask) + 8; + wq->rq.udb = (u64 __iomem *)(rdev->bar2_kva + off); + } } wq->rdev = rdev; wq->rq.msn = 1; @@ -285,9 +309,10 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (ret) goto free_dma; - PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", + PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%lx rqudb 0x%lx\n", __func__, wq->sq.qid, wq->rq.qid, wq->db, - (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); + (__force unsigned long) wq->sq.udb, + (__force unsigned long) wq->rq.udb); return 0; free_dma: @@ -411,6 +436,8 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, default: return -EINVAL; } + wqe->send.r3 = 0; + wqe->send.r4 = 0; plen = 0; if (wr->num_sge) { @@ -532,7 +559,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, } static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + struct ib_send_wr *wr, u8 *len16, u8 t5dev) { struct fw_ri_immd *imdp; @@ -541,7 +568,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); int rem; - if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) + if (wr->wr.fast_reg.page_list_len > + t4_max_fr_depth(use_dsgl)) return -EINVAL; wqe->fr.qpbinde_to_dcacpu = 0; @@ -554,28 +582,51 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); - WARN_ON(pbllen > T4_MAX_FR_IMMD); - imdp = (struct fw_ri_immd *)(&wqe->fr + 1); - imdp->op = FW_RI_DATA_IMMD; - imdp->r1 = 0; - imdp->r2 = 0; - imdp->immdlen = cpu_to_be32(pbllen); - p = (__be64 *)(imdp + 1); - rem = pbllen; - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { - *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); - rem -= sizeof *p; - if (++p == (__be64 *)&sq->queue[sq->size]) - p = (__be64 *)sq->queue; - } - BUG_ON(rem < 0); - while (rem) { - *p = 0; - rem -= sizeof *p; - if (++p == (__be64 *)&sq->queue[sq->size]) - p = (__be64 *)sq->queue; + + if (t5dev && use_dsgl && (pbllen > max_fr_immd)) { + struct c4iw_fr_page_list *c4pl = + to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); + struct fw_ri_dsgl *sglp; + + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + wr->wr.fast_reg.page_list->page_list[i] = (__force u64) + cpu_to_be64((u64) + wr->wr.fast_reg.page_list->page_list[i]); + } + + sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); + sglp->op = FW_RI_DATA_DSGL; + sglp->r1 = 0; + sglp->nsge = cpu_to_be16(1); + sglp->addr0 = cpu_to_be64(c4pl->dma_addr); + sglp->len0 = cpu_to_be32(pbllen); + + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16); + } else { + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + *p = cpu_to_be64( + (u64)wr->wr.fast_reg.page_list->page_list[i]); + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof(*p); + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*imdp) + + pbllen, 16); } - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); return 0; } @@ -601,6 +652,48 @@ void c4iw_qp_rem_ref(struct ib_qp *qp) wake_up(&(to_c4iw_qp(qp)->wait)); } +static void add_to_fc_list(struct list_head *head, struct list_head *entry) +{ + if (list_empty(entry)) + list_add_tail(entry, head); +} + +static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_sq_db(&qhp->wq, inc, + is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.sq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + +static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc) +{ + unsigned long flags; + + spin_lock_irqsave(&qhp->rhp->lock, flags); + spin_lock(&qhp->lock); + if (qhp->rhp->db_state == NORMAL) + t4_ring_rq_db(&qhp->wq, inc, + is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL); + else { + add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry); + qhp->wq.rq.wq_pidx_inc += inc; + } + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&qhp->rhp->lock, flags); + return 0; +} + int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -609,7 +702,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; - union t4_wr *wqe; + union t4_wr *wqe = NULL; u32 num_wrs; struct t4_swsqe *swsqe; unsigned long flag; @@ -638,7 +731,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { @@ -676,7 +769,10 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_FAST_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); + err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16, + is_t5( + qhp->rhp->rdev.lldi.adapter_type) ? + 1 : 0); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) @@ -696,7 +792,9 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; - swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); @@ -709,9 +807,14 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - if (t4_wq_db_enabled(&qhp->wq)) - t4_ring_sq_db(&qhp->wq, idx); - spin_unlock_irqrestore(&qhp->lock, flag); + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_sq_db(&qhp->wq, idx, + is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_sq_db(qhp, idx); + } return err; } @@ -720,7 +823,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, { int err = 0; struct c4iw_qp *qhp; - union t4_recv_wr *wqe; + union t4_recv_wr *wqe = NULL; u32 num_wrs; u8 len16 = 0; unsigned long flag; @@ -771,9 +874,14 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, wr = wr->next; num_wrs--; } - if (t4_wq_db_enabled(&qhp->wq)) - t4_ring_rq_db(&qhp->wq, idx); - spin_unlock_irqrestore(&qhp->lock, flag); + if (!qhp->rhp->rdev.status_page->db_off) { + t4_ring_rq_db(&qhp->wq, idx, + is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe); + spin_unlock_irqrestore(&qhp->lock, flag); + } else { + spin_unlock_irqrestore(&qhp->lock, flag); + ring_kernel_rq_db(qhp, idx); + } return err; } @@ -966,7 +1074,15 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); spin_lock(&qhp->lock); - c4iw_flush_hw_cq(&rchp->cq); + + if (qhp->wq.flushed) { + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); + return; + } + qhp->wq.flushed = 1; + + c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); @@ -980,9 +1096,9 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); - c4iw_flush_hw_cq(&schp->cq); - c4iw_count_scqes(&schp->cq, &qhp->wq, &count); - flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); + if (schp != rchp) + c4iw_flush_hw_cq(schp); + flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); if (flushed) { @@ -997,11 +1113,11 @@ static void flush_qp(struct c4iw_qp *qhp) struct c4iw_cq *rchp, *schp; unsigned long flag; - rchp = get_chp(qhp->rhp, qhp->attr.rcq); - schp = get_chp(qhp->rhp, qhp->attr.scq); + rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + schp = to_c4iw_cq(qhp->ibqp.send_cq); + t4_set_wq_in_error(&qhp->wq); if (qhp->ibqp.uobject) { - t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); @@ -1151,35 +1267,6 @@ out: return ret; } -/* - * Called by the library when the qp has user dbs disabled due to - * a DB_FULL condition. This function will single-thread all user - * DB rings to avoid overflowing the hw db-fifo. - */ -static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc) -{ - int delay = db_delay_usecs; - - mutex_lock(&qhp->rhp->db_mutex); - do { - - /* - * The interrupt threshold is dbfifo_int_thresh << 6. So - * make sure we don't cross that and generate an interrupt. - */ - if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < - (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) { - writel(QID(qid) | PIDX(inc), qhp->wq.db); - break; - } - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_timeout(usecs_to_jiffies(delay)); - delay = min(delay << 1, 2000); - } while (1); - mutex_unlock(&qhp->rhp->db_mutex); - return 0; -} - int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, enum c4iw_qp_attr_mask mask, struct c4iw_qp_attributes *attrs, @@ -1229,11 +1316,11 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } if (mask & C4IW_QP_ATTR_SQ_DB) { - ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc); + ret = ring_kernel_sq_db(qhp, attrs->sq_db_inc); goto out; } if (mask & C4IW_QP_ATTR_RQ_DB) { - ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc); + ret = ring_kernel_rq_db(qhp, attrs->rq_db_inc); goto out; } @@ -1283,6 +1370,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1290,28 +1378,30 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, disconnect = 1; c4iw_get_ep(&qhp->ep->com); } - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; break; case C4IW_QP_STATE_TERMINATE: + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; - if (!internal) + if (!internal) { + c4iw_get_ep(&qhp->ep->com); terminate = 1; - disconnect = 1; - c4iw_get_ep(&qhp->ep->com); + disconnect = 1; + } else { + terminate = qhp->attr.send_term; + ret = rdma_fini(rhp, qhp, ep); + if (ret) + goto err; + } break; case C4IW_QP_STATE_ERROR: + t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_ERROR); - if (qhp->ibqp.uobject) - t4_set_wq_in_error(&qhp->wq); if (!internal) { abort = 1; disconnect = 1; @@ -1383,6 +1473,7 @@ err: qhp->ep = NULL; set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; + abort = 1; wake_up(&qhp->wait); BUG_ON(!ep); flush_qp(qhp); @@ -1413,14 +1504,6 @@ out: return ret; } -static int enable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_enable_wq_db(&qp->wq); - return 0; -} - int c4iw_destroy_qp(struct ib_qp *ib_qp) { struct c4iw_dev *rhp; @@ -1438,19 +1521,15 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); wait_event(qhp->wait, !qhp->ep); - spin_lock_irq(&rhp->lock); - remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid); - rhp->qpcnt--; - BUG_ON(rhp->qpcnt < 0); - if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) { - rhp->rdev.stats.db_state_transitions++; - rhp->db_state = NORMAL; - idr_for_each(&rhp->qpidr, enable_qp_db, NULL); - } - spin_unlock_irq(&rhp->lock); + remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); atomic_dec(&qhp->refcnt); wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); + spin_lock_irq(&rhp->lock); + if (!list_empty(&qhp->db_fc_entry)) + list_del_init(&qhp->db_fc_entry); + spin_unlock_irq(&rhp->lock); + ucontext = ib_qp->uobject ? to_c4iw_ucontext(ib_qp->uobject->context) : NULL; destroy_qp(&rhp->rdev, &qhp->wq, @@ -1461,14 +1540,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp) return 0; } -static int disable_qp_db(int id, void *p, void *data) -{ - struct c4iw_qp *qp = p; - - t4_disable_wq_db(&qp->wq); - return 0; -} - struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { @@ -1478,7 +1549,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - int sqsize, rqsize; + unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; @@ -1508,12 +1579,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; - qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) return ERR_PTR(-ENOMEM); qhp->wq.sq.size = sqsize; qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue; + qhp->wq.sq.flush_cidx = -1; qhp->wq.rq.size = rqsize; qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue; @@ -1550,21 +1621,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); atomic_set(&qhp->refcnt, 1); - spin_lock_irq(&rhp->lock); - if (rhp->db_state != NORMAL) - t4_disable_wq_db(&qhp->wq); - if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) { - rhp->rdev.stats.db_state_transitions++; - rhp->db_state = FLOW_CONTROL; - idr_for_each(&rhp->qpidr, disable_qp_db, NULL); - } - ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); - spin_unlock_irq(&rhp->lock); + ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid); if (ret) goto err2; @@ -1609,6 +1672,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (mm5) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; + } else { + uresp.ma_sync_key = 0; } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; @@ -1631,11 +1696,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize); insert_mmap(ucontext, mm2); mm3->key = uresp.sq_db_gts_key; - mm3->addr = qhp->wq.sq.udb; + mm3->addr = (__force unsigned long) qhp->wq.sq.udb; mm3->len = PAGE_SIZE; insert_mmap(ucontext, mm3); mm4->key = uresp.rq_db_gts_key; - mm4->addr = qhp->wq.rq.udb; + mm4->addr = (__force unsigned long) qhp->wq.rq.udb; mm4->len = PAGE_SIZE; insert_mmap(ucontext, mm4); if (mm5) { @@ -1648,6 +1713,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, } qhp->ibqp.qp_num = qhp->wq.sq.qid; init_timer(&(qhp->timer)); + INIT_LIST_HEAD(&qhp->db_fc_entry); PDBG("%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x\n", __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, qhp->wq.sq.qid); @@ -1711,11 +1777,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, /* * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for * ringing the queue db when we're in DB_FULL mode. + * Only allow this on T4 devices. */ attrs.sq_db_inc = attr->sq_psn; attrs.rq_db_inc = attr->rq_psn; mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0; mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0; + if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) && + (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB))) + return -EINVAL; return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); } diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index cdef4d7fb6d..67df71a7012 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -179,8 +179,12 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) kfree(entry); } else { qid = c4iw_get_resource(&rdev->resource.qid_table); - if (!qid) + if (!qid) { + mutex_lock(&rdev->stats.lock); + rdev->stats.qid.fail++; + mutex_unlock(&rdev->stats.lock); goto out; + } mutex_lock(&rdev->stats.lock); rdev->stats.qid.cur += rdev->qpmask + 1; mutex_unlock(&rdev->stats.lock); @@ -322,8 +326,8 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size) unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6); PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size << 6); if (!addr) - printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n", - pci_name(rdev->lldi.pdev)); + pr_warn_ratelimited(MOD "%s: Out of RQT memory\n", + pci_name(rdev->lldi.pdev)); mutex_lock(&rdev->stats.lock); if (addr) { rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT); diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 16f26ab2930..68b0a6bf4eb 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -36,9 +36,9 @@ #include "t4_msg.h" #include "t4fw_ri_api.h" -#define T4_MAX_NUM_QP (1<<16) -#define T4_MAX_NUM_CQ (1<<15) -#define T4_MAX_NUM_PD (1<<15) +#define T4_MAX_NUM_QP 65536 +#define T4_MAX_NUM_CQ 65536 +#define T4_MAX_NUM_PD 65536 #define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1) #define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES) #define T4_MAX_IQ_SIZE (65520 - 1) @@ -47,7 +47,7 @@ #define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1) #define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1) #define T4_MAX_NUM_STAG (1<<15) -#define T4_MAX_MR_SIZE (~0ULL - 1) +#define T4_MAX_MR_SIZE (~0ULL) #define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */ #define T4_STAG_UNSET 0xffffffff #define T4_FW_MAJ 0 @@ -84,7 +84,14 @@ struct t4_status_page { sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ sizeof(struct fw_ri_immd)) & ~31UL) -#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_IMMD_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) +#define T4_MAX_FR_DSGL 1024 +#define T4_MAX_FR_DSGL_DEPTH (T4_MAX_FR_DSGL / sizeof(u64)) + +static inline int t4_max_fr_depth(int use_dsgl) +{ + return use_dsgl ? T4_MAX_FR_DSGL_DEPTH : T4_MAX_FR_IMMD_DEPTH; +} #define T4_RQ_NUM_SLOTS 2 #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) @@ -269,6 +276,7 @@ struct t4_swsqe { int complete; int signaled; u16 idx; + int flushed; }; static inline pgprot_t t4_pgprot_wc(pgprot_t prot) @@ -280,15 +288,6 @@ static inline pgprot_t t4_pgprot_wc(pgprot_t prot) #endif } -static inline int t4_ocqp_supported(void) -{ -#if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) - return 1; -#else - return 0; -#endif -} - enum { T4_SQ_ONCHIP = (1<<0), }; @@ -300,7 +299,7 @@ struct t4_sq { unsigned long phys_addr; struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; - u64 udb; + u64 __iomem *udb; size_t memsize; u32 qid; u16 in_use; @@ -308,7 +307,9 @@ struct t4_sq { u16 cidx; u16 pidx; u16 wq_pidx; + u16 wq_pidx_inc; u16 flags; + short flush_cidx; }; struct t4_swrqe { @@ -320,7 +321,7 @@ struct t4_rq { dma_addr_t dma_addr; DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_swrqe *sw_rq; - u64 udb; + u64 __iomem *udb; size_t memsize; u32 qid; u32 msn; @@ -331,6 +332,7 @@ struct t4_rq { u16 cidx; u16 pidx; u16 wq_pidx; + u16 wq_pidx_inc; }; struct t4_wq { @@ -339,6 +341,7 @@ struct t4_wq { void __iomem *db; void __iomem *gts; struct c4iw_rdev *rdev; + int flushed; }; static inline int t4_rqes_posted(struct t4_wq *wq) @@ -421,6 +424,9 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 len16) static inline void t4_sq_consume(struct t4_wq *wq) { + BUG_ON(wq->sq.in_use < 1); + if (wq->sq.cidx == wq->sq.flush_cidx) + wq->sq.flush_cidx = -1; wq->sq.in_use--; if (++wq->sq.cidx == wq->sq.size) wq->sq.cidx = 0; @@ -436,15 +442,67 @@ static inline u16 t4_sq_wq_size(struct t4_wq *wq) return wq->sq.size * T4_SQ_NUM_SLOTS; } -static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) +/* This function copies 64 byte coalesced work request to memory + * mapped BAR2 space. For coalesced WRs, the SGE fetches data + * from the FIFO instead of from Host. + */ +static inline void pio_copy(u64 __iomem *dst, u64 *src) +{ + int count = 8; + + while (count) { + writeq(*src, dst); + src++; + dst++; + count--; + } +} + +static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5, + union t4_wr *wqe) { + + /* Flush host queue memory writes. */ wmb(); + if (t5) { + if (inc == 1 && wqe) { + PDBG("%s: WC wq->sq.pidx = %d\n", + __func__, wq->sq.pidx); + pio_copy(wq->sq.udb + 7, (void *)wqe); + } else { + PDBG("%s: DB wq->sq.pidx = %d\n", + __func__, wq->sq.pidx); + writel(PIDX_T5(inc), wq->sq.udb); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; + } writel(QID(wq->sq.qid) | PIDX(inc), wq->db); } -static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc) +static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5, + union t4_recv_wr *wqe) { + + /* Flush host queue memory writes. */ wmb(); + if (t5) { + if (inc == 1 && wqe) { + PDBG("%s: WC wq->rq.pidx = %d\n", + __func__, wq->rq.pidx); + pio_copy(wq->rq.udb + 7, (void *)wqe); + } else { + PDBG("%s: DB wq->rq.pidx = %d\n", + __func__, wq->rq.pidx); + writel(PIDX_T5(inc), wq->rq.udb); + } + + /* Flush user doorbell area writes. */ + wmb(); + return; + } writel(QID(wq->rq.qid) | PIDX(inc), wq->db); } @@ -484,6 +542,7 @@ struct t4_cq { size_t memsize; __be64 bits_type_ts; u32 cqid; + int vector; u16 size; /* including status page */ u16 cidx; u16 sw_pidx; @@ -514,12 +573,18 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se) static inline void t4_swcq_produce(struct t4_cq *cq) { cq->sw_in_use++; + if (cq->sw_in_use == cq->size) { + PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid); + cq->error = 1; + BUG_ON(1); + } if (++cq->sw_pidx == cq->size) cq->sw_pidx = 0; } static inline void t4_swcq_consume(struct t4_cq *cq) { + BUG_ON(cq->sw_in_use < 1); cq->sw_in_use--; if (++cq->sw_cidx == cq->size) cq->sw_cidx = 0; @@ -528,7 +593,7 @@ static inline void t4_swcq_consume(struct t4_cq *cq) static inline void t4_hwcq_consume(struct t4_cq *cq) { cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts; - if (++cq->cidx_inc == (cq->size >> 4)) { + if (++cq->cidx_inc == (cq->size >> 4) || cq->cidx_inc == CIDXINC_MASK) { u32 val; val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) | @@ -561,7 +626,11 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) ret = -EOVERFLOW; cq->error = 1; printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid); + BUG_ON(1); } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { + + /* Ensure CQE is flushed to memory */ + rmb(); *cqe = &cq->queue[cq->cidx]; ret = 0; } else @@ -571,6 +640,12 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq) { + if (cq->sw_in_use == cq->size) { + PDBG("%s cxgb4 sw cq overflow cqid %u\n", __func__, cq->cqid); + cq->error = 1; + BUG_ON(1); + return NULL; + } if (cq->sw_in_use) return &cq->sw_queue[cq->sw_cidx]; return NULL; @@ -599,3 +674,7 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq) ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; } #endif + +struct t4_dev_status_page { + u8 db_off; +}; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index dc193c29267..91289a051af 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -836,4 +836,19 @@ struct ulptx_idata { #define V_RX_DACK_CHANGE(x) ((x) << S_RX_DACK_CHANGE) #define F_RX_DACK_CHANGE V_RX_DACK_CHANGE(1U) +enum { /* TCP congestion control algorithms */ + CONG_ALG_RENO, + CONG_ALG_TAHOE, + CONG_ALG_NEWRENO, + CONG_ALG_HIGHSPEED +}; + +#define S_CONG_CNTRL 14 +#define M_CONG_CNTRL 0x3 +#define V_CONG_CNTRL(x) ((x) << S_CONG_CNTRL) +#define G_CONG_CNTRL(x) (((x) >> S_CONG_CNTRL) & M_CONG_CNTRL) + +#define CONG_CNTRL_VALID (1 << 18) +#define T5_OPT_2_VALID (1 << 31) + #endif /* _T4FW_RI_API_H_ */ diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h index 32b754c35ab..cbd0ce17072 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/cxgb4/user.h @@ -48,6 +48,7 @@ struct c4iw_create_cq_resp { __u32 cqid; __u32 size; __u32 qid_mask; + __u32 reserved; /* explicit padding (optional for i386) */ }; @@ -70,4 +71,10 @@ struct c4iw_create_qp_resp { __u32 qid_mask; __u32 flags; }; + +struct c4iw_alloc_ucontext_resp { + __u64 status_page_key; + __u32 status_page_size; + __u32 reserved; /* explicit padding (optional for i386) */ +}; #endif |
