diff options
Diffstat (limited to 'drivers/infiniband/hw/nes/nes_cm.c')
-rw-r--r-- | drivers/infiniband/hw/nes/nes_cm.c | 3088 |
1 files changed, 3088 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c new file mode 100644 index 00000000000..bd5cfeaac20 --- /dev/null +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -0,0 +1,3088 @@ +/* + * Copyright (c) 2006 - 2008 NetEffect, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + + +#define TCPOPT_TIMESTAMP 8 + +#include <asm/atomic.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/init.h> +#include <linux/if_arp.h> +#include <linux/notifier.h> +#include <linux/net.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/time.h> +#include <linux/delay.h> +#include <linux/etherdevice.h> +#include <linux/netdevice.h> +#include <linux/random.h> +#include <linux/list.h> +#include <linux/threads.h> + +#include <net/neighbour.h> +#include <net/route.h> +#include <net/ip_fib.h> + +#include "nes.h" + +u32 cm_packets_sent; +u32 cm_packets_bounced; +u32 cm_packets_dropped; +u32 cm_packets_retrans; +u32 cm_packets_created; +u32 cm_packets_received; +u32 cm_listens_created; +u32 cm_listens_destroyed; +u32 cm_backlog_drops; +atomic_t cm_loopbacks; +atomic_t cm_nodes_created; +atomic_t cm_nodes_destroyed; +atomic_t cm_accel_dropped_pkts; +atomic_t cm_resets_recvd; + +static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); +static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, + struct nes_vnic *, struct nes_cm_info *); +static int add_ref_cm_node(struct nes_cm_node *); +static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); +static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); + + +/* External CM API Interface */ +/* instance of function pointers for client API */ +/* set address of this instance to cm_core->cm_ops at cm_core alloc */ +static struct nes_cm_ops nes_cm_api = { + mini_cm_accelerated, + mini_cm_listen, + mini_cm_del_listen, + mini_cm_connect, + mini_cm_close, + mini_cm_accept, + mini_cm_reject, + mini_cm_recv_pkt, + mini_cm_dealloc_core, + mini_cm_get, + mini_cm_set +}; + +struct nes_cm_core *g_cm_core; + +atomic_t cm_connects; +atomic_t cm_accepts; +atomic_t cm_disconnects; +atomic_t cm_closes; +atomic_t cm_connecteds; +atomic_t cm_connect_reqs; +atomic_t cm_rejects; + + +/** + * create_event + */ +static struct nes_cm_event *create_event(struct nes_cm_node *cm_node, + enum nes_cm_event_type type) +{ + struct nes_cm_event *event; + + if (!cm_node->cm_id) + return NULL; + + /* allocate an empty event */ + event = kzalloc(sizeof(*event), GFP_ATOMIC); + + if (!event) + return NULL; + + event->type = type; + event->cm_node = cm_node; + event->cm_info.rem_addr = cm_node->rem_addr; + event->cm_info.loc_addr = cm_node->loc_addr; + event->cm_info.rem_port = cm_node->rem_port; + event->cm_info.loc_port = cm_node->loc_port; + event->cm_info.cm_id = cm_node->cm_id; + + nes_debug(NES_DBG_CM, "Created event=%p, type=%u, dst_addr=%08x[%x]," + " src_addr=%08x[%x]\n", + event, type, + event->cm_info.loc_addr, event->cm_info.loc_port, + event->cm_info.rem_addr, event->cm_info.rem_port); + + nes_cm_post_event(event); + return event; +} + + +/** + * send_mpa_request + */ +int send_mpa_request(struct nes_cm_node *cm_node) +{ + struct sk_buff *skb; + int ret; + + skb = get_free_pkt(cm_node); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + /* send an MPA Request frame */ + form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame, + cm_node->mpa_frame_size, SET_ACK); + + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + if (ret < 0) { + return ret; + } + + return 0; +} + + +/** + * recv_mpa - process a received TCP pkt, we are expecting an + * IETF MPA frame + */ +static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 len) +{ + struct ietf_mpa_frame *mpa_frame; + + /* assume req frame is in tcp data payload */ + if (len < sizeof(struct ietf_mpa_frame)) { + nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len); + return -1; + } + + mpa_frame = (struct ietf_mpa_frame *)buffer; + cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len); + + if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) { + nes_debug(NES_DBG_CM, "The received ietf buffer was not right" + " complete (%x + %x != %x)\n", + cm_node->mpa_frame_size, (u32)sizeof(struct ietf_mpa_frame), len); + return -1; + } + + /* copy entire MPA frame to our cm_node's frame */ + memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame), + cm_node->mpa_frame_size); + + return 0; +} + + +/** + * handle_exception_pkt - process an exception packet. + * We have been in a TSA state, and we have now received SW + * TCP/IP traffic should be a FIN request or IP pkt with options + */ +static int handle_exception_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret = 0; + struct tcphdr *tcph = tcp_hdr(skb); + + /* first check to see if this a FIN pkt */ + if (tcph->fin) { + /* we need to ACK the FIN request */ + send_ack(cm_node); + + /* check which side we are (client/server) and set next state accordingly */ + if (cm_node->tcp_cntxt.client) + cm_node->state = NES_CM_STATE_CLOSING; + else { + /* we are the server side */ + cm_node->state = NES_CM_STATE_CLOSE_WAIT; + /* since this is a self contained CM we don't wait for */ + /* an APP to close us, just send final FIN immediately */ + ret = send_fin(cm_node, NULL); + cm_node->state = NES_CM_STATE_LAST_ACK; + } + } else { + ret = -EINVAL; + } + + return ret; +} + + +/** + * form_cm_frame - get a free packet and build empty frame Use + * node info to build. + */ +struct sk_buff *form_cm_frame(struct sk_buff *skb, struct nes_cm_node *cm_node, + void *options, u32 optionsize, void *data, u32 datasize, u8 flags) +{ + struct tcphdr *tcph; + struct iphdr *iph; + struct ethhdr *ethh; + u8 *buf; + u16 packetsize = sizeof(*iph); + + packetsize += sizeof(*tcph); + packetsize += optionsize + datasize; + + memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph)); + + skb->len = 0; + buf = skb_put(skb, packetsize + ETH_HLEN); + + ethh = (struct ethhdr *) buf; + buf += ETH_HLEN; + + iph = (struct iphdr *)buf; + buf += sizeof(*iph); + tcph = (struct tcphdr *)buf; + skb_reset_mac_header(skb); + skb_set_network_header(skb, ETH_HLEN); + skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph)); + buf += sizeof(*tcph); + + skb->ip_summed = CHECKSUM_PARTIAL; + skb->protocol = htons(0x800); + skb->data_len = 0; + skb->mac_len = ETH_HLEN; + + memcpy(ethh->h_dest, cm_node->rem_mac, ETH_ALEN); + memcpy(ethh->h_source, cm_node->loc_mac, ETH_ALEN); + ethh->h_proto = htons(0x0800); + + iph->version = IPVERSION; + iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ + iph->tos = 0; + iph->tot_len = htons(packetsize); + iph->id = htons(++cm_node->tcp_cntxt.loc_id); + + iph->frag_off = htons(0x4000); + iph->ttl = 0x40; + iph->protocol = 0x06; /* IPPROTO_TCP */ + + iph->saddr = htonl(cm_node->loc_addr); + iph->daddr = htonl(cm_node->rem_addr); + + tcph->source = htons(cm_node->loc_port); + tcph->dest = htons(cm_node->rem_port); + tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num); + + if (flags & SET_ACK) { + cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; + tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num); + tcph->ack = 1; + } else + tcph->ack_seq = 0; + + if (flags & SET_SYN) { + cm_node->tcp_cntxt.loc_seq_num++; + tcph->syn = 1; + } else + cm_node->tcp_cntxt.loc_seq_num += datasize; /* data (no headers) */ + + if (flags & SET_FIN) + tcph->fin = 1; + + if (flags & SET_RST) + tcph->rst = 1; + + tcph->doff = (u16)((sizeof(*tcph) + optionsize + 3) >> 2); + tcph->window = htons(cm_node->tcp_cntxt.rcv_wnd); + tcph->urg_ptr = 0; + if (optionsize) + memcpy(buf, options, optionsize); + buf += optionsize; + if (datasize) + memcpy(buf, data, datasize); + + skb_shinfo(skb)->nr_frags = 0; + cm_packets_created++; + + return skb; +} + + +/** + * print_core - dump a cm core + */ +static void print_core(struct nes_cm_core *core) +{ + nes_debug(NES_DBG_CM, "---------------------------------------------\n"); + nes_debug(NES_DBG_CM, "CM Core -- (core = %p )\n", core); + if (!core) + return; + nes_debug(NES_DBG_CM, "---------------------------------------------\n"); + nes_debug(NES_DBG_CM, "Session ID : %u \n", atomic_read(&core->session_id)); + + nes_debug(NES_DBG_CM, "State : %u \n", core->state); + + nes_debug(NES_DBG_CM, "Tx Free cnt : %u \n", skb_queue_len(&core->tx_free_list)); + nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); + nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); + + nes_debug(NES_DBG_CM, "core : %p \n", core); + + nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); +} + + +/** + * schedule_nes_timer + * note - cm_node needs to be protected before calling this. Encase in: + * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node); + */ +int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, + enum nes_timer_type type, int send_retrans, + int close_when_complete) +{ + unsigned long flags; + struct nes_cm_core *cm_core; + struct nes_timer_entry *new_send; + int ret = 0; + u32 was_timer_set; + + new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); + if (!new_send) + return -1; + if (!cm_node) + return -EINVAL; + + /* new_send->timetosend = currenttime */ + new_send->retrycount = NES_DEFAULT_RETRYS; + new_send->retranscount = NES_DEFAULT_RETRANS; + new_send->skb = skb; + new_send->timetosend = jiffies; + new_send->type = type; + new_send->netdev = cm_node->netdev; + new_send->send_retrans = send_retrans; + new_send->close_when_complete = close_when_complete; + + if (type == NES_TIMER_TYPE_CLOSE) { + new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */ + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + list_add_tail(&new_send->list, &cm_node->recv_list); + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + } + + if (type == NES_TIMER_TYPE_SEND) { + new_send->seq_num = htonl(tcp_hdr(skb)->seq); + atomic_inc(&new_send->skb->users); + + ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); + if (ret != NETDEV_TX_OK) { + nes_debug(NES_DBG_CM, "Error sending packet %p (jiffies = %lu)\n", + new_send, jiffies); + atomic_dec(&new_send->skb->users); + new_send->timetosend = jiffies; + } else { + cm_packets_sent++; + if (!send_retrans) { + if (close_when_complete) + rem_ref_cm_node(cm_node->cm_core, cm_node); + dev_kfree_skb_any(new_send->skb); + kfree(new_send); + return ret; + } + new_send->timetosend = jiffies + NES_RETRY_TIMEOUT; + } + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + list_add_tail(&new_send->list, &cm_node->retrans_list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + } + if (type == NES_TIMER_TYPE_RECV) { + new_send->seq_num = htonl(tcp_hdr(skb)->seq); + new_send->timetosend = jiffies; + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + list_add_tail(&new_send->list, &cm_node->recv_list); + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + } + cm_core = cm_node->cm_core; + + was_timer_set = timer_pending(&cm_core->tcp_timer); + + if (!was_timer_set) { + cm_core->tcp_timer.expires = new_send->timetosend; + add_timer(&cm_core->tcp_timer); + } + + return ret; +} + + +/** + * nes_cm_timer_tick + */ +void nes_cm_timer_tick(unsigned long pass) +{ + unsigned long flags, qplockflags; + unsigned long nexttimeout = jiffies + NES_LONG_TIME; + struct iw_cm_id *cm_id; + struct nes_cm_node *cm_node; + struct nes_timer_entry *send_entry, *recv_entry; + struct list_head *list_core, *list_core_temp; + struct list_head *list_node, *list_node_temp; + struct nes_cm_core *cm_core = g_cm_core; + struct nes_qp *nesqp; + struct sk_buff *skb; + u32 settimer = 0; + int ret = NETDEV_TX_OK; + int node_done; + + spin_lock_irqsave(&cm_core->ht_lock, flags); + + list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) { + cm_node = container_of(list_node, struct nes_cm_node, list); + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + list_for_each_safe(list_core, list_node_temp, &cm_node->recv_list) { + recv_entry = container_of(list_core, struct nes_timer_entry, list); + if ((time_after(recv_entry->timetosend, jiffies)) && + (recv_entry->type == NES_TIMER_TYPE_CLOSE)) { + if (nexttimeout > recv_entry->timetosend || !settimer) { + nexttimeout = recv_entry->timetosend; + settimer = 1; + } + continue; + } + list_del(&recv_entry->list); + cm_id = cm_node->cm_id; + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + if (recv_entry->type == NES_TIMER_TYPE_CLOSE) { + nesqp = (struct nes_qp *)recv_entry->skb; + spin_lock_irqsave(&nesqp->lock, qplockflags); + if (nesqp->cm_id) { + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d: " + "****** HIT A NES_TIMER_TYPE_CLOSE" + " with something to do!!! ******\n", + nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; + nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; + nesqp->ibqp_state = IB_QPS_ERR; + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_cm_disconn(nesqp); + } else { + spin_unlock_irqrestore(&nesqp->lock, qplockflags); + nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, refcount = %d:" + " ****** HIT A NES_TIMER_TYPE_CLOSE" + " with nothing to do!!! ******\n", + nesqp->hwqp.qp_id, cm_id, + atomic_read(&nesqp->refcount)); + nes_rem_ref(&nesqp->ibqp); + } + if (cm_id) + cm_id->rem_ref(cm_id); + } + kfree(recv_entry); + spin_lock_irqsave(&cm_node->recv_list_lock, flags); + } + spin_unlock_irqrestore(&cm_node->recv_list_lock, flags); + + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + node_done = 0; + list_for_each_safe(list_core, list_node_temp, &cm_node->retrans_list) { + if (node_done) { + break; + } + send_entry = container_of(list_core, struct nes_timer_entry, list); + if (time_after(send_entry->timetosend, jiffies)) { + if (cm_node->state != NES_CM_STATE_TSA) { + if ((nexttimeout > send_entry->timetosend) || !settimer) { + nexttimeout = send_entry->timetosend; + settimer = 1; + } + node_done = 1; + continue; + } else { + list_del(&send_entry->list); + skb = send_entry->skb; + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + dev_kfree_skb_any(skb); + kfree(send_entry); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + } + if (send_entry->type == NES_TIMER_NODE_CLEANUP) { + list_del(&send_entry->list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + kfree(send_entry); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + if ((send_entry->seq_num < cm_node->tcp_cntxt.rem_ack_num) || + (cm_node->state == NES_CM_STATE_TSA) || + (cm_node->state == NES_CM_STATE_CLOSED)) { + skb = send_entry->skb; + list_del(&send_entry->list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + kfree(send_entry); + dev_kfree_skb_any(skb); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + + if (!send_entry->retranscount || !send_entry->retrycount) { + cm_packets_dropped++; + skb = send_entry->skb; + list_del(&send_entry->list); + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + dev_kfree_skb_any(skb); + kfree(send_entry); + if (cm_node->state == NES_CM_STATE_SYN_RCVD) { + /* this node never even generated an indication up to the cm */ + rem_ref_cm_node(cm_core, cm_node); + } else { + cm_node->state = NES_CM_STATE_CLOSED; + create_event(cm_node, NES_CM_EVENT_ABORTED); + } + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + /* this seems like the correct place, but leave send entry unprotected */ + // spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + atomic_inc(&send_entry->skb->users); + cm_packets_retrans++; + nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p," + " jiffies = %lu, time to send = %lu, retranscount = %u, " + "send_entry->seq_num = 0x%08X, cm_node->tcp_cntxt.rem_ack_num = 0x%08X\n", + send_entry, cm_node, jiffies, send_entry->timetosend, send_entry->retranscount, + send_entry->seq_num, cm_node->tcp_cntxt.rem_ack_num); + + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); + if (ret != NETDEV_TX_OK) { + cm_packets_bounced++; + atomic_dec(&send_entry->skb->users); + send_entry->retrycount--; + nexttimeout = jiffies + NES_SHORT_TIME; + settimer = 1; + node_done = 1; + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } else { + cm_packets_sent++; + } + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + list_del(&send_entry->list); + nes_debug(NES_DBG_CM, "Packet Sent: retrans count = %u, retry count = %u.\n", + send_entry->retranscount, send_entry->retrycount); + if (send_entry->send_retrans) { + send_entry->retranscount--; + send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT; + if (nexttimeout > send_entry->timetosend || !settimer) { + nexttimeout = send_entry->timetosend; + settimer = 1; + } + list_add(&send_entry->list, &cm_node->retrans_list); + continue; + } else { + int close_when_complete; + skb = send_entry->skb; + close_when_complete = send_entry->close_when_complete; + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + if (close_when_complete) { + BUG_ON(atomic_read(&cm_node->ref_count) == 1); + rem_ref_cm_node(cm_core, cm_node); + } + dev_kfree_skb_any(skb); + kfree(send_entry); + spin_lock_irqsave(&cm_node->retrans_list_lock, flags); + continue; + } + } + spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); + + rem_ref_cm_node(cm_core, cm_node); + + spin_lock_irqsave(&cm_core->ht_lock, flags); + if (ret != NETDEV_TX_OK) + break; + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + if (settimer) { + if (!timer_pending(&cm_core->tcp_timer)) { + cm_core->tcp_timer.expires = nexttimeout; + add_timer(&cm_core->tcp_timer); + } + } +} + + +/** + * send_syn + */ +int send_syn(struct nes_cm_node *cm_node, u32 sendack) +{ + int ret; + int flags = SET_SYN; + struct sk_buff *skb; + char optionsbuffer[sizeof(struct option_mss) + + sizeof(struct option_windowscale) + + sizeof(struct option_base) + 1]; + + int optionssize = 0; + /* Sending MSS option */ + union all_known_options *options; + + if (!cm_node) + return -EINVAL; + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_mss.optionnum = OPTION_NUMBER_MSS; + options->as_mss.length = sizeof(struct option_mss); + options->as_mss.mss = htons(cm_node->tcp_cntxt.mss); + optionssize += sizeof(struct option_mss); + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_windowscale.optionnum = OPTION_NUMBER_WINDOW_SCALE; + options->as_windowscale.length = sizeof(struct option_windowscale); + options->as_windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale; + optionssize += sizeof(struct option_windowscale); + + if (sendack && !(NES_DRV_OPT_SUPRESS_OPTION_BC & nes_drv_opt) + ) { + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_base.optionnum = OPTION_NUMBER_WRITE0; + options->as_base.length = sizeof(struct option_base); + optionssize += sizeof(struct option_base); + /* we need the size to be a multiple of 4 */ + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = 1; + optionssize += 1; + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = 1; + optionssize += 1; + } + + options = (union all_known_options *)&optionsbuffer[optionssize]; + options->as_end = OPTION_NUMBER_END; + optionssize += 1; + + skb = get_free_pkt(cm_node); + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + if (sendack) + flags |= SET_ACK; + + form_cm_frame(skb, cm_node, optionsbuffer, optionssize, NULL, 0, flags); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + + return ret; +} + + +/** + * send_reset + */ +int send_reset(struct nes_cm_node *cm_node) +{ + int ret; + struct sk_buff *skb = get_free_pkt(cm_node); + int flags = SET_RST | SET_ACK; + + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + add_ref_cm_node(cm_node); + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, flags); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 1); + + return ret; +} + + +/** + * send_ack + */ +int send_ack(struct nes_cm_node *cm_node) +{ + int ret; + struct sk_buff *skb = get_free_pkt(cm_node); + + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 0, 0); + + return ret; +} + + +/** + * send_fin + */ +int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb) +{ + int ret; + + /* if we didn't get a frame get one */ + if (!skb) + skb = get_free_pkt(cm_node); + + if (!skb) { + nes_debug(NES_DBG_CM, "Failed to get a Free pkt\n"); + return -1; + } + + form_cm_frame(skb, cm_node, NULL, 0, NULL, 0, SET_ACK | SET_FIN); + ret = schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); + + return ret; +} + + +/** + * get_free_pkt + */ +struct sk_buff *get_free_pkt(struct nes_cm_node *cm_node) +{ + struct sk_buff *skb, *new_skb; + + /* check to see if we need to repopulate the free tx pkt queue */ + if (skb_queue_len(&cm_node->cm_core->tx_free_list) < NES_CM_FREE_PKT_LO_WATERMARK) { + while (skb_queue_len(&cm_node->cm_core->tx_free_list) < + cm_node->cm_core->free_tx_pkt_max) { + /* replace the frame we took, we won't get it back */ + new_skb = dev_alloc_skb(cm_node->cm_core->mtu); + BUG_ON(!new_skb); + /* add a replacement frame to the free tx list head */ + skb_queue_head(&cm_node->cm_core->tx_free_list, new_skb); + } + } + + skb = skb_dequeue(&cm_node->cm_core->tx_free_list); + + return skb; +} + + +/** + * make_hashkey - generate hash key from node tuple + */ +static inline int make_hashkey(u16 loc_port, nes_addr_t loc_addr, u16 rem_port, + nes_addr_t rem_addr) +{ + u32 hashkey = 0; + + hashkey = loc_addr + rem_addr + loc_port + rem_port; + hashkey = (hashkey % NES_CM_HASHTABLE_SIZE); + + return hashkey; +} + + +/** + * find_node - find a cm node that matches the reference cm node + */ +static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, + u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) +{ + unsigned long flags; + u32 hashkey; + struct list_head *list_pos; + struct list_head *hte; + struct nes_cm_node *cm_node; + + /* make a hash index key for this packet */ + hashkey = make_hashkey(loc_port, loc_addr, rem_port, rem_addr); + + /* get a handle on the hte */ + hte = &cm_core->connected_nodes; + + nes_debug(NES_DBG_CM, "Searching for an owner node:%x:%x from core %p->%p\n", + loc_addr, loc_port, cm_core, hte); + + /* walk list and find cm_node associated with this session ID */ + spin_lock_irqsave(&cm_core->ht_lock, flags); + list_for_each(list_pos, hte) { + cm_node = container_of(list_pos, struct nes_cm_node, list); + /* compare quad, return node handle if a match */ + nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n", + cm_node->loc_addr, cm_node->loc_port, + loc_addr, loc_port, + cm_node->rem_addr, cm_node->rem_port, + rem_addr, rem_port); + if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) && + (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) { + add_ref_cm_node(cm_node); + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + return cm_node; + } + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + /* no owner node */ + return NULL; +} + + +/** + * find_listener - find a cm node listening on this addr-port pair + */ +static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, + nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state) +{ + unsigned long flags; + struct list_head *listen_list; + struct nes_cm_listener *listen_node; + + /* walk list and find cm_node associated with this session ID */ + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_for_each(listen_list, &cm_core->listen_list.list) { + listen_node = container_of(listen_list, struct nes_cm_listener, list); + /* compare node pair, return node handle if a match */ + if (((listen_node->loc_addr == dst_addr) || + listen_node->loc_addr == 0x00000000) && + (listen_node->loc_port == dst_port) && + (listener_state & listen_node->listener_state)) { + atomic_inc(&listen_node->ref_count); + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + return listen_node; + } + } + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + + nes_debug(NES_DBG_CM, "Unable to find listener- %x:%x\n", + dst_addr, dst_port); + + /* no listener */ + return NULL; +} + + +/** + * add_hte_node - add a cm node to the hash table + */ +static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) +{ + unsigned long flags; + u32 hashkey; + struct list_head *hte; + + if (!cm_node || !cm_core) + return -EINVAL; + + nes_debug(NES_DBG_CM, "Adding Node to Active Connection HT\n"); + + /* first, make an index into our hash table */ + hashkey = make_hashkey(cm_node->loc_port, cm_node->loc_addr, + cm_node->rem_port, cm_node->rem_addr); + cm_node->hashkey = hashkey; + + spin_lock_irqsave(&cm_core->ht_lock, flags); + + /* get a handle on the hash table element (list head for this slot) */ + hte = &cm_core->connected_nodes; + list_add_tail(&cm_node->list, hte); + atomic_inc(&cm_core->ht_node_cnt); + + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + return 0; +} + + +/** + * mini_cm_dec_refcnt_listen + */ +static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, + struct nes_cm_listener *listener, int free_hanging_nodes) +{ + int ret = 1; + unsigned long flags; + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + if (!atomic_dec_return(&listener->ref_count)) { + list_del(&listener->list); + + /* decrement our listen node count */ + atomic_dec(&cm_core->listen_node_cnt); + + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + + if (listener->nesvnic) { + nes_manage_apbvt(listener->nesvnic, listener->loc_port, + PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); + } + + nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); + + kfree(listener); + ret = 0; + cm_listens_destroyed++; + } else { + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + } + if (listener) { + if (atomic_read(&listener->pend_accepts_cnt) > 0) + nes_debug(NES_DBG_CM, "destroying listener (%p)" + " with non-zero pending accepts=%u\n", + listener, atomic_read(&listener->pend_accepts_cnt)); + } + + return ret; +} + + +/** + * mini_cm_del_listen + */ +static int mini_cm_del_listen(struct nes_cm_core *cm_core, + struct nes_cm_listener *listener) +{ + listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE; + listener->cm_id = NULL; /* going to be destroyed pretty soon */ + return mini_cm_dec_refcnt_listen(cm_core, listener, 1); +} + + +/** + * mini_cm_accelerated + */ +static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, + struct nes_cm_node *cm_node) +{ + u32 was_timer_set; + cm_node->accelerated = 1; + + if (cm_node->accept_pend) { + BUG_ON(!cm_node->listener); + atomic_dec(&cm_node->listener->pend_accepts_cnt); + BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); + } + + was_timer_set = timer_pending(&cm_core->tcp_timer); + if (!was_timer_set) { + cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME; + add_timer(&cm_core->tcp_timer); + } + + return 0; +} + + +/** + * nes_addr_send_arp + */ +static void nes_addr_send_arp(u32 dst_ip) +{ + struct rtable *rt; + struct flowi fl; + + memset(&fl, 0, sizeof fl); + fl.nl_u.ip4_u.daddr = htonl(dst_ip); + if (ip_route_output_key(&init_net, &rt, &fl)) { + printk("%s: ip_route_output_key failed for 0x%08X\n", + __FUNCTION__, dst_ip); + return; + } + + neigh_event_send(rt->u.dst.neighbour, NULL); + ip_rt_put(rt); +} + + +/** + * make_cm_node - create a new instance of a cm node + */ +static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, + struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, + struct nes_cm_listener *listener) +{ + struct nes_cm_node *cm_node; + struct timespec ts; + int arpindex = 0; + struct nes_device *nesdev; + struct nes_adapter *nesadapter; + + /* create an hte and cm_node for this instance */ + cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC); + if (!cm_node) + return NULL; + + /* set our node specific transport info */ + cm_node->loc_addr = cm_info->loc_addr; + cm_node->rem_addr = cm_info->rem_addr; + cm_node->loc_port = cm_info->loc_port; + cm_node->rem_port = cm_info->rem_port; + cm_node->send_write0 = send_first; + nes_debug(NES_DBG_CM, "Make node addresses : loc = %x:%x, rem = %x:%x\n", + cm_node->loc_addr, cm_node->loc_port, cm_node->rem_addr, cm_node->rem_port); + cm_node->listener = listener; + cm_node->netdev = nesvnic->netdev; + cm_node->cm_id = cm_info->cm_id; + memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); + + nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", + cm_node->listener, cm_node->cm_id); + + INIT_LIST_HEAD(&cm_node->retrans_list); + spin_lock_init(&cm_node->retrans_list_lock); + INIT_LIST_HEAD(&cm_node->recv_list); + spin_lock_init(&cm_node->recv_list_lock); + + cm_node->loopbackpartner = NULL; + atomic_set(&cm_node->ref_count, 1); + /* associate our parent CM core */ + cm_node->cm_core = cm_core; + cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID; + cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; + cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> + NES_CM_DEFAULT_RCV_WND_SCALE; + ts = current_kernel_time(); + cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); + cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - + sizeof(struct tcphdr) - ETH_HLEN; + cm_node->tcp_cntxt.rcv_nxt = 0; + /* get a unique session ID , add thread_id to an upcounter to handle race */ + atomic_inc(&cm_core->node_cnt); + atomic_inc(&cm_core->session_id); + cm_node->session_id = (u32)(atomic_read(&cm_core->session_id) + current->tgid); + cm_node->conn_type = cm_info->conn_type; + cm_node->apbvt_set = 0; + cm_node->accept_pend = 0; + + cm_node->nesvnic = nesvnic; + /* get some device handles, for arp lookup */ + nesdev = nesvnic->nesdev; + nesadapter = nesdev->nesadapter; + + cm_node->loopbackpartner = NULL; + /* get the mac addr for the remote node */ + arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); + if (arpindex < 0) { + kfree(cm_node); + nes_addr_send_arp(cm_info->rem_addr); + return NULL; + } + + /* copy the mac addr to node context */ + memcpy(cm_node->rem_mac, nesadapter->arp_table[arpindex].mac_addr, ETH_ALEN); + nes_debug(NES_DBG_CM, "Remote mac addr from arp table:%02x," + " %02x, %02x, %02x, %02x, %02x\n", + cm_node->rem_mac[0], cm_node->rem_mac[1], + cm_node->rem_mac[2], cm_node->rem_mac[3], + cm_node->rem_mac[4], cm_node->rem_mac[5]); + + add_hte_node(cm_core, cm_node); + atomic_inc(&cm_nodes_created); + + return cm_node; +} + + +/** + * add_ref_cm_node - destroy an instance of a cm node + */ +static int add_ref_cm_node(struc |