diff options
author | kxie@chelsio.com <kxie@chelsio.com> | 2010-08-16 20:55:53 -0700 |
---|---|---|
committer | James Bottomley <James.Bottomley@suse.de> | 2010-09-05 14:28:35 -0300 |
commit | 9ba682f01e2ffe47e6ea47fcc6cdfe39d7a71571 (patch) | |
tree | 6e46beeb7796e5ed61cf5b70334763d533197469 /drivers/scsi/cxgbi | |
parent | ec21b3b0dbbaf5965f6b508cb6b48d9fe5bb6ab5 (diff) |
[SCSI] libcxgbi: common library for cxgb3i and cxgb4i
[PATCH v5 1/3] libcxgbi: common library for cxgb3i and cxgb4i
From: Karen Xie <kxie@chelsio.com>
Extracts common functions to libcxgbi.
Signed-off-by: Karen Xie <kxie@chelsio.com>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/cxgbi')
-rw-r--r-- | drivers/scsi/cxgbi/libcxgbi.c | 2610 | ||||
-rw-r--r-- | drivers/scsi/cxgbi/libcxgbi.h | 753 |
2 files changed, 3363 insertions, 0 deletions
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c new file mode 100644 index 00000000000..db9d08a831d --- /dev/null +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -0,0 +1,2610 @@ +/* + * libcxgbi.c: Chelsio common library for T3/T4 iSCSI driver. + * + * Copyright (c) 2010 Chelsio Communications, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written by: Karen Xie (kxie@chelsio.com) + * Written by: Rakesh Ranjan (rranjan@chelsio.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ + +#include <linux/skbuff.h> +#include <linux/crypto.h> +#include <linux/scatterlist.h> +#include <linux/pci.h> +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_host.h> +#include <linux/if_vlan.h> +#include <linux/inet.h> +#include <net/dst.h> +#include <net/route.h> +#include <linux/inetdevice.h> /* ip_dev_find */ +#include <net/tcp.h> + +static unsigned int dbg_level; + +#include "libcxgbi.h" + +#define DRV_MODULE_NAME "libcxgbi" +#define DRV_MODULE_DESC "Chelsio iSCSI driver library" +#define DRV_MODULE_VERSION "0.9.0" +#define DRV_MODULE_RELDATE "Jun. 2010" + +MODULE_AUTHOR("Chelsio Communications, Inc."); +MODULE_DESCRIPTION(DRV_MODULE_DESC); +MODULE_VERSION(DRV_MODULE_VERSION); +MODULE_LICENSE("GPL"); + +module_param(dbg_level, uint, 0644); +MODULE_PARM_DESC(dbg_level, "libiscsi debug level (default=0)"); + + +/* + * cxgbi device management + * maintains a list of the cxgbi devices + */ +static LIST_HEAD(cdev_list); +static DEFINE_MUTEX(cdev_mutex); + +int cxgbi_device_portmap_create(struct cxgbi_device *cdev, unsigned int base, + unsigned int max_conn) +{ + struct cxgbi_ports_map *pmap = &cdev->pmap; + + pmap->port_csk = cxgbi_alloc_big_mem(max_conn * + sizeof(struct cxgbi_sock *), + GFP_KERNEL); + if (!pmap->port_csk) { + pr_warn("cdev 0x%p, portmap OOM %u.\n", cdev, max_conn); + return -ENOMEM; + } + + pmap->max_connect = max_conn; + pmap->sport_base = base; + spin_lock_init(&pmap->lock); + return 0; +} +EXPORT_SYMBOL_GPL(cxgbi_device_portmap_create); + +void cxgbi_device_portmap_cleanup(struct cxgbi_device *cdev) +{ + struct cxgbi_ports_map *pmap = &cdev->pmap; + struct cxgbi_sock *csk; + int i; + + for (i = 0; i < pmap->max_connect; i++) { + if (pmap->port_csk[i]) { + csk = pmap->port_csk[i]; + pmap->port_csk[i] = NULL; + log_debug(1 << CXGBI_DBG_SOCK, + "csk 0x%p, cdev 0x%p, offload down.\n", + csk, cdev); + spin_lock_bh(&csk->lock); + cxgbi_sock_set_flag(csk, CTPF_OFFLOAD_DOWN); + cxgbi_sock_closed(csk); + spin_unlock_bh(&csk->lock); + cxgbi_sock_put(csk); + } + } +} +EXPORT_SYMBOL_GPL(cxgbi_device_portmap_cleanup); + +static inline void cxgbi_device_destroy(struct cxgbi_device *cdev) +{ + log_debug(1 << CXGBI_DBG_DEV, + "cdev 0x%p, p# %u.\n", cdev, cdev->nports); + cxgbi_hbas_remove(cdev); + cxgbi_device_portmap_cleanup(cdev); + if (cdev->dev_ddp_cleanup) + cdev->dev_ddp_cleanup(cdev); + else + cxgbi_ddp_cleanup(cdev); + if (cdev->ddp) + cxgbi_ddp_cleanup(cdev); + if (cdev->pmap.max_connect) + cxgbi_free_big_mem(cdev->pmap.port_csk); + kfree(cdev); +} + +struct cxgbi_device *cxgbi_device_register(unsigned int extra, + unsigned int nports) +{ + struct cxgbi_device *cdev; + + cdev = kzalloc(sizeof(*cdev) + extra + nports * + (sizeof(struct cxgbi_hba *) + + sizeof(struct net_device *)), + GFP_KERNEL); + if (!cdev) { + pr_warn("nport %d, OOM.\n", nports); + return NULL; + } + cdev->ports = (struct net_device **)(cdev + 1); + cdev->hbas = (struct cxgbi_hba **)(((char*)cdev->ports) + nports * + sizeof(struct net_device *)); + if (extra) + cdev->dd_data = ((char *)cdev->hbas) + + nports * sizeof(struct cxgbi_hba *); + spin_lock_init(&cdev->pmap.lock); + + mutex_lock(&cdev_mutex); + list_add_tail(&cdev->list_head, &cdev_list); + mutex_unlock(&cdev_mutex); + + log_debug(1 << CXGBI_DBG_DEV, + "cdev 0x%p, p# %u.\n", cdev, nports); + return cdev; +} +EXPORT_SYMBOL_GPL(cxgbi_device_register); + +void cxgbi_device_unregister(struct cxgbi_device *cdev) +{ + log_debug(1 << CXGBI_DBG_DEV, + "cdev 0x%p, p# %u,%s.\n", + cdev, cdev->nports, cdev->nports ? cdev->ports[0]->name : ""); + mutex_lock(&cdev_mutex); + list_del(&cdev->list_head); + mutex_unlock(&cdev_mutex); + cxgbi_device_destroy(cdev); +} +EXPORT_SYMBOL_GPL(cxgbi_device_unregister); + +void cxgbi_device_unregister_all(unsigned int flag) +{ + struct cxgbi_device *cdev, *tmp; + + mutex_lock(&cdev_mutex); + list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) { + if ((cdev->flags & flag) == flag) { + log_debug(1 << CXGBI_DBG_DEV, + "cdev 0x%p, p# %u,%s.\n", + cdev, cdev->nports, cdev->nports ? + cdev->ports[0]->name : ""); + list_del(&cdev->list_head); + cxgbi_device_destroy(cdev); + } + } + mutex_unlock(&cdev_mutex); +} +EXPORT_SYMBOL_GPL(cxgbi_device_unregister_all); + +struct cxgbi_device *cxgbi_device_find_by_lldev(void *lldev) +{ + struct cxgbi_device *cdev, *tmp; + + mutex_lock(&cdev_mutex); + list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) { + if (cdev->lldev == lldev) { + mutex_unlock(&cdev_mutex); + return cdev; + } + } + mutex_unlock(&cdev_mutex); + log_debug(1 << CXGBI_DBG_DEV, + "lldev 0x%p, NO match found.\n", lldev); + return NULL; +} +EXPORT_SYMBOL_GPL(cxgbi_device_find_by_lldev); + +static struct cxgbi_device *cxgbi_device_find_by_netdev(struct net_device *ndev, + int *port) +{ + struct cxgbi_device *cdev, *tmp; + int i; + + if (ndev->priv_flags & IFF_802_1Q_VLAN) + ndev = vlan_dev_real_dev(ndev); + + mutex_lock(&cdev_mutex); + list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) { + for (i = 0; i < cdev->nports; i++) { + if (ndev == cdev->ports[i]) { + mutex_unlock(&cdev_mutex); + if (port) + *port = i; + return cdev; + } + } + } + mutex_unlock(&cdev_mutex); + log_debug(1 << CXGBI_DBG_DEV, + "ndev 0x%p, %s, NO match found.\n", ndev, ndev->name); + return NULL; +} + +struct cxgbi_hba *cxgbi_hba_find_by_netdev(struct net_device *dev, + struct cxgbi_device *cdev) +{ + int i; + + if (dev->priv_flags & IFF_802_1Q_VLAN) + dev = vlan_dev_real_dev(dev); + + for (i = 0; i < cdev->nports; i++) { + if (cdev->hbas[i]->ndev == dev) + return cdev->hbas[i]; + } + log_debug(1 << CXGBI_DBG_DEV, + "ndev 0x%p, %s, cdev 0x%p, NO match found.\n", + dev, dev->name, cdev); + return NULL; +} + +void cxgbi_hbas_remove(struct cxgbi_device *cdev) +{ + int i; + struct cxgbi_hba *chba; + + log_debug(1 << CXGBI_DBG_DEV, + "cdev 0x%p, p#%u.\n", cdev, cdev->nports); + + for (i = 0; i < cdev->nports; i++) { + chba = cdev->hbas[i]; + if (chba) { + cdev->hbas[i] = NULL; + iscsi_host_remove(chba->shost); + pci_dev_put(cdev->pdev); + iscsi_host_free(chba->shost); + } + } +} +EXPORT_SYMBOL_GPL(cxgbi_hbas_remove); + +int cxgbi_hbas_add(struct cxgbi_device *cdev, unsigned int max_lun, + unsigned int max_id, struct scsi_host_template *sht, + struct scsi_transport_template *stt) +{ + struct cxgbi_hba *chba; + struct Scsi_Host *shost; + int i, err; + + log_debug(1 << CXGBI_DBG_DEV, "cdev 0x%p, p#%u.\n", cdev, cdev->nports); + + for (i = 0; i < cdev->nports; i++) { + shost = iscsi_host_alloc(sht, sizeof(*chba), 1); + if (!shost) { + pr_info("0x%p, p%d, %s, host alloc failed.\n", + cdev, i, cdev->ports[i]->name); + err = -ENOMEM; + goto err_out; + } + + shost->transportt = stt; + shost->max_lun = max_lun; + shost->max_id = max_id; + shost->max_channel = 0; + shost->max_cmd_len = 16; + + chba = iscsi_host_priv(shost); + chba->cdev = cdev; + chba->ndev = cdev->ports[i]; + chba->shost = shost; + + log_debug(1 << CXGBI_DBG_DEV, + "cdev 0x%p, p#%d %s: chba 0x%p.\n", + cdev, i, cdev->ports[i]->name, chba); + + pci_dev_get(cdev->pdev); + err = iscsi_host_add(shost, &cdev->pdev->dev); + if (err) { + pr_info("cdev 0x%p, p#%d %s, host add failed.\n", + cdev, i, cdev->ports[i]->name); + pci_dev_put(cdev->pdev); + scsi_host_put(shost); + goto err_out; + } + + cdev->hbas[i] = chba; + } + + return 0; + +err_out: + cxgbi_hbas_remove(cdev); + return err; +} +EXPORT_SYMBOL_GPL(cxgbi_hbas_add); + +/* + * iSCSI offload + * + * - source port management + * To find a free source port in the port allocation map we use a very simple + * rotor scheme to look for the next free port. + * + * If a source port has been specified make sure that it doesn't collide with + * our normal source port allocation map. If it's outside the range of our + * allocation/deallocation scheme just let them use it. + * + * If the source port is outside our allocation range, the caller is + * responsible for keeping track of their port usage. + */ +static int sock_get_port(struct cxgbi_sock *csk) +{ + struct cxgbi_device *cdev = csk->cdev; + struct cxgbi_ports_map *pmap = &cdev->pmap; + unsigned int start; + int idx; + + if (!pmap->max_connect) { + pr_err("cdev 0x%p, p#%u %s, NO port map.\n", + cdev, csk->port_id, cdev->ports[csk->port_id]->name); + return -EADDRNOTAVAIL; + } + + if (csk->saddr.sin_port) { + pr_err("source port NON-ZERO %u.\n", + ntohs(csk->saddr.sin_port)); + return -EADDRINUSE; + } + + spin_lock_bh(&pmap->lock); + if (pmap->used >= pmap->max_connect) { + spin_unlock_bh(&pmap->lock); + pr_info("cdev 0x%p, p#%u %s, ALL ports used.\n", + cdev, csk->port_id, cdev->ports[csk->port_id]->name); + return -EADDRNOTAVAIL; + } + + start = idx = pmap->next; + do { + if (++idx >= pmap->max_connect) + idx = 0; + if (!pmap->port_csk[idx]) { + pmap->used++; + csk->saddr.sin_port = + htons(pmap->sport_base + idx); + pmap->next = idx; + pmap->port_csk[idx] = csk; + spin_unlock_bh(&pmap->lock); + cxgbi_sock_get(csk); + log_debug(1 << CXGBI_DBG_SOCK, + "cdev 0x%p, p#%u %s, p %u, %u.\n", + cdev, csk->port_id, + cdev->ports[csk->port_id]->name, + pmap->sport_base + idx, pmap->next); + return 0; + } + } while (idx != start); + spin_unlock_bh(&pmap->lock); + + /* should not happen */ + pr_warn("cdev 0x%p, p#%u %s, next %u?\n", + cdev, csk->port_id, cdev->ports[csk->port_id]->name, + pmap->next); + return -EADDRNOTAVAIL; +} + +static void sock_put_port(struct cxgbi_sock *csk) +{ + struct cxgbi_device *cdev = csk->cdev; + struct cxgbi_ports_map *pmap = &cdev->pmap; + + if (csk->saddr.sin_port) { + int idx = ntohs(csk->saddr.sin_port) - pmap->sport_base; + + csk->saddr.sin_port = 0; + if (idx < 0 || idx >= pmap->max_connect) { + pr_err("cdev 0x%p, p#%u %s, port %u OOR.\n", + cdev, csk->port_id, + cdev->ports[csk->port_id]->name, + ntohs(csk->saddr.sin_port)); + return; + } + + spin_lock_bh(&pmap->lock); + pmap->port_csk[idx] = NULL; + pmap->used--; + spin_unlock_bh(&pmap->lock); + + log_debug(1 << CXGBI_DBG_SOCK, + "cdev 0x%p, p#%u %s, release %u.\n", + cdev, csk->port_id, cdev->ports[csk->port_id]->name, + pmap->sport_base + idx); + + cxgbi_sock_put(csk); + } +} + +/* + * iscsi tcp connection + */ +void cxgbi_sock_free_cpl_skbs(struct cxgbi_sock *csk) +{ + if (csk->cpl_close) { + kfree_skb(csk->cpl_close); + csk->cpl_close = NULL; + } + if (csk->cpl_abort_req) { + kfree_skb(csk->cpl_abort_req); + csk->cpl_abort_req = NULL; + } + if (csk->cpl_abort_rpl) { + kfree_skb(csk->cpl_abort_rpl); + csk->cpl_abort_rpl = NULL; + } +} +EXPORT_SYMBOL_GPL(cxgbi_sock_free_cpl_skbs); + +static struct cxgbi_sock *cxgbi_sock_create(struct cxgbi_device *cdev) +{ + struct cxgbi_sock *csk = kzalloc(sizeof(*csk), GFP_NOIO); + + if (!csk) { + pr_info("alloc csk %zu failed.\n", sizeof(*csk)); + return NULL; + } + + if (cdev->csk_alloc_cpls(csk) < 0) { + pr_info("csk 0x%p, alloc cpls failed.\n", csk); + kfree(csk); + return NULL; + } + + spin_lock_init(&csk->lock); + kref_init(&csk->refcnt); + skb_queue_head_init(&csk->receive_queue); + skb_queue_head_init(&csk->write_queue); + setup_timer(&csk->retry_timer, NULL, (unsigned long)csk); + rwlock_init(&csk->callback_lock); + csk->cdev = cdev; + csk->flags = 0; + cxgbi_sock_set_state(csk, CTP_CLOSED); + + log_debug(1 << CXGBI_DBG_SOCK, "cdev 0x%p, new csk 0x%p.\n", cdev, csk); + + return csk; +} + +static struct rtable *find_route_ipv4(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport, u8 tos) +{ + struct rtable *rt; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip4_u = { + .daddr = daddr, + .saddr = saddr, + .tos = tos } + }, + .proto = IPPROTO_TCP, + .uli_u = { + .ports = { + .sport = sport, + .dport = dport } + } + }; + + if (ip_route_output_flow(&init_net, &rt, &fl, NULL, 0)) + return NULL; + + return rt; +} + +static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) +{ + struct sockaddr_in *daddr = (struct sockaddr_in *)dst_addr; + struct dst_entry *dst; + struct net_device *ndev; + struct cxgbi_device *cdev; + struct rtable *rt = NULL; + struct cxgbi_sock *csk = NULL; + unsigned int mtu = 0; + int port = 0xFFFF; + int err = 0; + + if (daddr->sin_family != AF_INET) { + pr_info("address family 0x%x NOT supported.\n", + daddr->sin_family); + err = -EAFNOSUPPORT; + goto err_out; + } + + rt = find_route_ipv4(0, daddr->sin_addr.s_addr, 0, daddr->sin_port, 0); + if (!rt) { + pr_info("no route to ipv4 0x%x, port %u.\n", + daddr->sin_addr.s_addr, daddr->sin_port); + err = -ENETUNREACH; + goto err_out; + } + dst = &rt->dst; + ndev = dst->neighbour->dev; + + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { + pr_info("multi-cast route %pI4, port %u, dev %s.\n", + &daddr->sin_addr.s_addr, ntohs(daddr->sin_port), + ndev->name); + err = -ENETUNREACH; + goto rel_rt; + } + + if (ndev->flags & IFF_LOOPBACK) { + ndev = ip_dev_find(&init_net, daddr->sin_addr.s_addr); + mtu = ndev->mtu; + pr_info("rt dev %s, loopback -> %s, mtu %u.\n", + dst->neighbour->dev->name, ndev->name, mtu); + } + + if (ndev->priv_flags & IFF_802_1Q_VLAN) { + ndev = vlan_dev_real_dev(ndev); + pr_info("rt dev %s, vlan -> %s.\n", + dst->neighbour->dev->name, ndev->name); + } + + cdev = cxgbi_device_find_by_netdev(ndev, &port); + if (!cdev) { + pr_info("dst %pI4, %s, NOT cxgbi device.\n", + &daddr->sin_addr.s_addr, ndev->name); + err = -ENETUNREACH; + goto rel_rt; + } + log_debug(1 << CXGBI_DBG_SOCK, + "route to %pI4 :%u, ndev p#%d,%s, cdev 0x%p.\n", + &daddr->sin_addr.s_addr, ntohs(daddr->sin_port), + port, ndev->name, cdev); + + csk = cxgbi_sock_create(cdev); + if (!csk) { + err = -ENOMEM; + goto rel_rt; + } + csk->cdev = cdev; + csk->port_id = port; + csk->mtu = mtu; + csk->dst = dst; + csk->daddr.sin_addr.s_addr = daddr->sin_addr.s_addr; + csk->daddr.sin_port = daddr->sin_port; + if (cdev->hbas[port]->ipv4addr) + csk->saddr.sin_addr.s_addr = cdev->hbas[port]->ipv4addr; + else + csk->saddr.sin_addr.s_addr = rt->rt_src; + + return csk; + +rel_rt: + ip_rt_put(rt); + if (csk) + cxgbi_sock_closed(csk); +err_out: + return ERR_PTR(err); +} + +void cxgbi_sock_established(struct cxgbi_sock *csk, unsigned int snd_isn, + unsigned int opt) +{ + csk->write_seq = csk->snd_nxt = csk->snd_una = snd_isn; + dst_confirm(csk->dst); + smp_mb(); + cxgbi_sock_set_state(csk, CTP_ESTABLISHED); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_established); + +static void cxgbi_inform_iscsi_conn_closing(struct cxgbi_sock *csk) +{ + log_debug(1 << CXGBI_DBG_SOCK, + "csk 0x%p, state %u, flags 0x%lx, conn 0x%p.\n", + csk, csk->state, csk->flags, csk->user_data); + + if (csk->state != CTP_ESTABLISHED) { + read_lock(&csk->callback_lock); + if (csk->user_data) + iscsi_conn_failure(csk->user_data, + ISCSI_ERR_CONN_FAILED); + read_unlock(&csk->callback_lock); + } +} + +void cxgbi_sock_closed(struct cxgbi_sock *csk) +{ + log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n", + csk, (csk)->state, (csk)->flags, (csk)->tid); + cxgbi_sock_set_flag(csk, CTPF_ACTIVE_CLOSE_NEEDED); + if (csk->state == CTP_ACTIVE_OPEN || csk->state == CTP_CLOSED) + return; + if (csk->saddr.sin_port) + sock_put_port(csk); + if (csk->dst) + dst_release(csk->dst); + csk->cdev->csk_release_offload_resources(csk); + cxgbi_sock_set_state(csk, CTP_CLOSED); + cxgbi_inform_iscsi_conn_closing(csk); + cxgbi_sock_put(csk); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_closed); + +static void need_active_close(struct cxgbi_sock *csk) +{ + int data_lost; + int close_req = 0; + + log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n", + csk, (csk)->state, (csk)->flags, (csk)->tid); + spin_lock_bh(&csk->lock); + dst_confirm(csk->dst); + data_lost = skb_queue_len(&csk->receive_queue); + __skb_queue_purge(&csk->receive_queue); + + if (csk->state == CTP_ACTIVE_OPEN) + cxgbi_sock_set_flag(csk, CTPF_ACTIVE_CLOSE_NEEDED); + else if (csk->state == CTP_ESTABLISHED) { + close_req = 1; + cxgbi_sock_set_state(csk, CTP_ACTIVE_CLOSE); + } else if (csk->state == CTP_PASSIVE_CLOSE) { + close_req = 1; + cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_2); + } + + if (close_req) { + if (data_lost) + csk->cdev->csk_send_abort_req(csk); + else + csk->cdev->csk_send_close_req(csk); + } + + spin_unlock_bh(&csk->lock); +} + +void cxgbi_sock_fail_act_open(struct cxgbi_sock *csk, int errno) +{ + pr_info("csk 0x%p,%u,%lx, %pI4:%u-%pI4:%u, err %d.\n", + csk, csk->state, csk->flags, + &csk->saddr.sin_addr.s_addr, csk->saddr.sin_port, + &csk->daddr.sin_addr.s_addr, csk->daddr.sin_port, + errno); + + cxgbi_sock_set_state(csk, CTP_CONNECTING); + csk->err = errno; + cxgbi_sock_closed(csk); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_fail_act_open); + +void cxgbi_sock_act_open_req_arp_failure(void *handle, struct sk_buff *skb) +{ + struct cxgbi_sock *csk = (struct cxgbi_sock *)skb->sk; + + log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n", + csk, (csk)->state, (csk)->flags, (csk)->tid); + cxgbi_sock_get(csk); + spin_lock_bh(&csk->lock); + if (csk->state == CTP_ACTIVE_OPEN) + cxgbi_sock_fail_act_open(csk, -EHOSTUNREACH); + spin_unlock_bh(&csk->lock); + cxgbi_sock_put(csk); + __kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_act_open_req_arp_failure); + +void cxgbi_sock_rcv_abort_rpl(struct cxgbi_sock *csk) +{ + cxgbi_sock_get(csk); + spin_lock_bh(&csk->lock); + if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING)) { + if (!cxgbi_sock_flag(csk, CTPF_ABORT_RPL_RCVD)) + cxgbi_sock_set_flag(csk, CTPF_ABORT_RPL_RCVD); + else { + cxgbi_sock_clear_flag(csk, CTPF_ABORT_RPL_RCVD); + cxgbi_sock_clear_flag(csk, CTPF_ABORT_RPL_PENDING); + if (cxgbi_sock_flag(csk, CTPF_ABORT_REQ_RCVD)) + pr_err("csk 0x%p,%u,0x%lx,%u,ABT_RPL_RSS.\n", + csk, csk->state, csk->flags, csk->tid); + cxgbi_sock_closed(csk); + } + } + spin_unlock_bh(&csk->lock); + cxgbi_sock_put(csk); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_abort_rpl); + +void cxgbi_sock_rcv_peer_close(struct cxgbi_sock *csk) +{ + log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n", + csk, (csk)->state, (csk)->flags, (csk)->tid); + cxgbi_sock_get(csk); + spin_lock_bh(&csk->lock); + + if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING)) + goto done; + + switch (csk->state) { + case CTP_ESTABLISHED: + cxgbi_sock_set_state(csk, CTP_PASSIVE_CLOSE); + break; + case CTP_ACTIVE_CLOSE: + cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_2); + break; + case CTP_CLOSE_WAIT_1: + cxgbi_sock_closed(csk); + break; + case CTP_ABORTING: + break; + default: + pr_err("csk 0x%p,%u,0x%lx,%u, bad state.\n", + csk, csk->state, csk->flags, csk->tid); + } + cxgbi_inform_iscsi_conn_closing(csk); +done: + spin_unlock_bh(&csk->lock); + cxgbi_sock_put(csk); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_peer_close); + +void cxgbi_sock_rcv_close_conn_rpl(struct cxgbi_sock *csk, u32 snd_nxt) +{ + log_debug(1 << CXGBI_DBG_SOCK, "csk 0x%p,%u,0x%lx,%u.\n", + csk, (csk)->state, (csk)->flags, (csk)->tid); + cxgbi_sock_get(csk); + spin_lock_bh(&csk->lock); + + csk->snd_una = snd_nxt - 1; + if (cxgbi_sock_flag(csk, CTPF_ABORT_RPL_PENDING)) + goto done; + + switch (csk->state) { + case CTP_ACTIVE_CLOSE: + cxgbi_sock_set_state(csk, CTP_CLOSE_WAIT_1); + break; + case CTP_CLOSE_WAIT_1: + case CTP_CLOSE_WAIT_2: + cxgbi_sock_closed(csk); + break; + case CTP_ABORTING: + break; + default: + pr_err("csk 0x%p,%u,0x%lx,%u, bad state.\n", + csk, csk->state, csk->flags, csk->tid); + } +done: + spin_unlock_bh(&csk->lock); + cxgbi_sock_put(csk); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_close_conn_rpl); + +void cxgbi_sock_rcv_wr_ack(struct cxgbi_sock *csk, unsigned int credits, + unsigned int snd_una, int seq_chk) +{ + log_debug(1 << CXGBI_DBG_TOE | 1 << CXGBI_DBG_SOCK, + "csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, snd_una %u,%d.\n", + csk, csk->state, csk->flags, csk->tid, credits, + csk->wr_cred, csk->wr_una_cred, snd_una, seq_chk); + + spin_lock_bh(&csk->lock); + + csk->wr_cred += credits; + if (csk->wr_una_cred > csk->wr_max_cred - csk->wr_cred) + csk->wr_una_cred = csk->wr_max_cred - csk->wr_cred; + + while (credits) { + struct sk_buff *p = cxgbi_sock_peek_wr(csk); + + if (unlikely(!p)) { + pr_err("csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, empty.\n", + csk, csk->state, csk->flags, csk->tid, credits, + csk->wr_cred, csk->wr_una_cred); + break; + } + + if (unlikely(credits < p->csum)) { + pr_warn("csk 0x%p,%u,0x%lx,%u, cr %u,%u+%u, < %u.\n", + csk, csk->state, csk->flags, csk->tid, + credits, csk->wr_cred, csk->wr_una_cred, + p->csum); + p->csum -= credits; + break; + } else { + cxgbi_sock_dequeue_wr(csk); + credits -= p->csum; + kfree_skb(p); + } + } + + cxgbi_sock_check_wr_invariants(csk); + + if (seq_chk) { + if (unlikely(before(snd_una, csk->snd_una))) { + pr_warn("csk 0x%p,%u,0x%lx,%u, snd_una %u/%u.", + csk, csk->state, csk->flags, csk->tid, snd_una, + csk->snd_una); + goto done; + } + + if (csk->snd_una != snd_una) { + csk->snd_una = snd_una; + dst_confirm(csk->dst); + } + } + + if (skb_queue_len(&csk->write_queue)) { + if (csk->cdev->csk_push_tx_frames(csk, 0)) + cxgbi_conn_tx_open(csk); + } else + cxgbi_conn_tx_open(csk); +done: + spin_unlock_bh(&csk->lock); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_rcv_wr_ack); + +static unsigned int cxgbi_sock_find_best_mtu(struct cxgbi_sock *csk, + unsigned short mtu) +{ + int i = 0; + + while (i < csk->cdev->nmtus - 1 && csk->cdev->mtus[i + 1] <= mtu) + ++i; + + return i; +} + +unsigned int cxgbi_sock_select_mss(struct cxgbi_sock *csk, unsigned int pmtu) +{ + unsigned int idx; + struct dst_entry *dst = csk->dst; + + csk->advmss = dst_metric(dst, RTAX_ADVMSS); + + if (csk->advmss > pmtu - 40) + csk->advmss = pmtu - 40; + if (csk->advmss < csk->cdev->mtus[0] - 40) + csk->advmss = csk->cdev->mtus[0] - 40; + idx = cxgbi_sock_find_best_mtu(csk, csk->advmss + 40); + + return idx; +} +EXPORT_SYMBOL_GPL(cxgbi_sock_select_mss); + +void cxgbi_sock_skb_entail(struct cxgbi_sock *csk, struct sk_buff *skb) +{ + cxgbi_skcb_tcp_seq(skb) = csk->write_seq; + __skb_queue_tail(&csk->write_queue, skb); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_skb_entail); + +void cxgbi_sock_purge_wr_queue(struct cxgbi_sock *csk) +{ + struct sk_buff *skb; + + while ((skb = cxgbi_sock_dequeue_wr(csk)) != NULL) + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_purge_wr_queue); + +void cxgbi_sock_check_wr_invariants(const struct cxgbi_sock *csk) +{ + int pending = cxgbi_sock_count_pending_wrs(csk); + + if (unlikely(csk->wr_cred + pending != csk->wr_max_cred)) + pr_err("csk 0x%p, tid %u, credit %u + %u != %u.\n", + csk, csk->tid, csk->wr_cred, pending, csk->wr_max_cred); +} +EXPORT_SYMBOL_GPL(cxgbi_sock_check_wr_invariants); + +static int cxgbi_sock_send_pdus(struct cxgbi_sock *csk, struct sk_buff *skb) +{ + struct cxgbi_device *cdev = csk->cdev; + struct sk_buff *next; + int err, copied = 0; + + spin_lock_bh(&csk->lock); + + if (csk->state != CTP_ESTABLISHED) { + log_debug(1 << CXGBI_DBG_PDU_TX, + "csk 0x%p,%u,0x%lx,%u, EAGAIN.\n", + csk, csk->state, csk->flags, csk->tid); + err = -EAGAIN; + goto out_err; + } + + if (csk->err) { + log_debug(1 << CXGBI_DBG_PDU_TX, + "csk 0x%p,%u,0x%lx,%u, EPIPE %d.\n", + csk, csk->state, csk->flags, csk->tid, csk->err); + err = -EPIPE; + goto out_err; + } + + if (csk->write_seq - csk->snd_una >= cdev->snd_win) { + log_debug(1 << CXGBI_DBG_PDU_TX, + "csk 0x%p,%u,0x%lx,%u, FULL %u-%u >= %u.\n", + csk, csk->state, csk->flags, csk->tid, csk->write_seq, + csk->snd_una, cdev->snd_win); + err = -ENOBUFS; + goto out_err; + } + + while (skb) { + int frags = skb_shinfo(skb)->nr_frags + + (skb->len != skb->data_len); + + if (unlikely(skb_headroom(skb) < cdev->skb_tx_rsvd)) { + pr_err("csk 0x%p, skb head %u < %u.\n", + csk, skb_headroom(skb), cdev->skb_tx_rsvd); + err = -EINVAL; + goto out_err; + } + + if (frags >= SKB_WR_LIST_SIZE) { + pr_err("csk 0x%p, frags %d, %u,%u >%u.\n", + csk, skb_shinfo(skb)->nr_frags, skb->len, + skb->data_len, (uint)(SKB_WR_LIST_SIZE)); + err = -EINVAL; + goto out_err; + } + + next = skb->next; + skb->next = NULL; + cxgbi_skcb_set_flag(skb, SKCBF_TX_NEED_HDR); + cxgbi_sock_skb_entail(csk, skb); + copied += skb->len; + csk->write_seq += skb->len + + cxgbi_ulp_extra_len(cxgbi_skcb_ulp_mode(skb)); + skb = next; + } +done: + if (likely(skb_queue_len(&csk->write_queue))) + cdev->csk_push_tx_frames(csk, 1); + spin_unlock_bh(&csk->lock); + return copied; + +out_err: + if (copied == 0 && err == -EPIPE) + copied = csk->err ? csk->err : -EPIPE; + else + copied = err; + goto done; +} + +/* + * Direct Data Placement - + * Directly place the iSCSI Data-In or Data-Out PDU's payload into pre-posted + * final destination host-memory buffers based on the Initiator Task Tag (ITT) + * in Data-In or Target Task Tag (TTT) in Data-Out PDUs. + * The host memory address is programmed into h/w in the format of pagepod + * entries. + * The location of the pagepod entry is encoded into ddp tag which is used as + * the base for ITT/TTT. + */ + +static unsigned char ddp_page_order[DDP_PGIDX_MAX] = {0, 1, 2, 4}; +static unsigned char ddp_page_shift[DDP_PGIDX_MAX] = {12, 13, 14, 16}; +static unsigned char page_idx = DDP_PGIDX_MAX; + +static unsigned char sw_tag_idx_bits; +static unsigned char sw_tag_age_bits; + +/* + * Direct-Data Placement page size adjustment + */ +static int ddp_adjust_page_table(void) +{ + int i; + unsigned int base_order, order; + + if (PAGE_SIZE < (1UL << ddp_page_shift[0])) { + pr_info("PAGE_SIZE 0x%lx too small, min 0x%lx\n", + PAGE_SIZE, 1UL << ddp_page_shift[0]); + return -EINVAL; + } + + base_order = get_order(1UL << ddp_page_shift[0]); + order = get_order(1UL << PAGE_SHIFT); + + for (i = 0; i < DDP_PGIDX_MAX; i++) { + /* first is the kernel page size, then just doubling */ + ddp_page_order[i] = order - base_order + i; + ddp_page_shift[i] = PAGE_SHIFT + i; + } + return 0; +} + +static int ddp_find_page_index(unsigned long pgsz) +{ + int i; + + for (i = 0; i < DDP_PGIDX_MAX; i++) { + if (pgsz == (1UL << ddp_page_shift[i])) + return i; + } + pr_info("ddp page size %lu not supported.\n", pgsz); + return DDP_PGIDX_MAX; +} + +static void ddp_setup_host_page_size(void) +{ + if (page_idx == DDP_PGIDX_MAX) { + page_idx = ddp_find_page_index(PAGE_SIZE); + + if (page_idx == DDP_PGIDX_MAX) { + pr_info("system PAGE %lu, update hw.\n", PAGE_SIZE); + if (ddp_adjust_page_table() < 0) { + pr_info("PAGE %lu, disable ddp.\n", PAGE_SIZE); + return; + } + page_idx = ddp_find_page_index(PAGE_SIZE); + } + pr_info("system PAGE %lu, ddp idx %u.\n", PAGE_SIZE, page_idx); + } +} + +void cxgbi_ddp_page_size_factor(int *pgsz_factor) +{ + int i; + + for (i = 0; i < DDP_PGIDX_MAX; i++) + pgsz_factor[i] = ddp_page_order[i]; +} +EXPORT_SYMBOL_GPL(cxgbi_ddp_page_size_factor); + +/* + * DDP setup & teardown + */ + +void cxgbi_ddp_ppod_set(struct cxgbi_pagepod *ppod, + struct cxgbi_pagepod_hdr *hdr, + struct cxgbi_gather_list *gl, unsigned int gidx) +{ + int i; + + memcpy(ppod, hdr, sizeof(*hdr)); + for (i = 0; i < (PPOD_PAGES_MAX + 1); i++, gidx++) { + ppod->addr[i] = gidx < gl->nelem ? + cpu_to_be64(gl->phys_addr[gidx]) : 0ULL; + } +} +EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_set); + +void cxgbi_ddp_ppod_clear(struct cxgbi_pagepod *ppod) +{ + memset(ppod, 0, sizeof(*ppod)); +} +EXPORT_SYMBOL_GPL(cxgbi_ddp_ppod_clear); + +static inline int ddp_find_unused_entries(struct cxgbi_ddp_info *ddp, + unsigned int start, unsigned int max, + unsigned int count, + struct cxgbi_gather_list *gl) +{ + unsigned int i, j, k; + + /* not enough entries */ + if ((max - start) < count) { + log_debug(1 << CXGBI_DBG_DDP, + "NOT enough entries %u+%u < %u.\n", start, count, max); + return -EBUSY; + } + + max -= count; + spin_lock(&ddp->map_lock); + for (i = start; i < max;) { + for (j = 0, k = i; j < count; j++, k++) { + if (ddp->gl_map[k]) + break; + } + if (j == count) { + for (j = 0, k = i; j < count; j++, k++) + ddp->gl_map[k] = gl; + spin_unlock(&ddp->map_lock); + return i; + } + i += j + 1; + } + spin_unlock(&ddp->map_lock); + log_debug(1 << CXGBI_DBG_DDP, + "NO suitable entries %u available.\n", count); + return -EBUSY; +} + +static inline void ddp_unmark_entries(struct cxgbi_ddp_info *ddp, + int start, int count) +{ + spin_lock(&ddp->map_lock); + memset(&ddp->gl_map[start], 0, + count * sizeof(struct cxgbi_gather_list *)); + spin_unlock(&ddp->map_lock); +} + +static inline void ddp_gl_unmap(struct pci_dev *pdev, + struct cxgbi_gather_list *gl) +{ + int i; + + for (i = 0; i < gl->nelem; i++) + dma_unmap_page(&pdev->dev, gl->phys_addr[i], PAGE_SIZE, + PCI_DMA_FROMDEVICE); +} + +static inline int ddp_gl_map(struct pci_dev *pdev, + struct cxgbi_gather_list *gl) +{ + int i; + + for (i = 0; i < gl->nelem; i++) { + gl->phys_addr[i] = dma_map_page(&pdev->dev, gl->pages[i], 0, + PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(&pdev->dev, gl->phys_addr[i]))) { + log_debug(1 << CXGBI_DBG_DDP, + "page %d 0x%p, 0x%p dma mapping err.\n", + i, gl->pages[i], pdev); + goto unmap; + } + } + return i; +unmap: + if (i) { + unsigned int nelem = gl->nelem; + + gl->nelem = i; + ddp_gl_unmap(pdev, gl); + gl->nelem = nelem; + } + return -EINVAL; +} + +static void ddp_release_gl(struct cxgbi_gather_list *gl, + struct pci_dev *pdev) +{ + ddp_gl_unmap(pdev, gl); + kfree(gl); +} + +static struct cxgbi_gather_list *ddp_make_gl(unsigned int xferlen, + struct scatterlist *sgl, + unsigned int sgcnt, + struct pci_dev *pdev, + gfp_t gfp) +{ + struct cxgbi_gather_list *gl; + struct scatterlist *sg = sgl; |