aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/s2io.c589
-rw-r--r--drivers/net/s2io.h38
2 files changed, 565 insertions, 62 deletions
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index da0d8a85acc..0db218c2dbe 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -57,16 +57,20 @@
#include <linux/ethtool.h>
#include <linux/workqueue.h>
#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
+#include <asm/div64.h>
/* local include */
#include "s2io.h"
#include "s2io-regs.h"
-#define DRV_VERSION "Version 2.0.9.4"
+#define DRV_VERSION "2.0.11.2"
/* S2io Driver name & version. */
static char s2io_driver_name[] = "Neterion";
@@ -168,6 +172,11 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = {
{"\n DRIVER STATISTICS"},
{"single_bit_ecc_errs"},
{"double_bit_ecc_errs"},
+ ("lro_aggregated_pkts"),
+ ("lro_flush_both_count"),
+ ("lro_out_of_sequence_pkts"),
+ ("lro_flush_due_to_max_pkts"),
+ ("lro_avg_aggr_pkts"),
};
#define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN
@@ -317,6 +326,12 @@ static unsigned int indicate_max_pkts;
static unsigned int rxsync_frequency = 3;
/* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */
static unsigned int intr_type = 0;
+/* Large receive offload feature */
+static unsigned int lro = 0;
+/* Max pkts to be aggregated by LRO at one time. If not specified,
+ * aggregation happens until we hit max IP pkt size(64K)
+ */
+static unsigned int lro_max_pkts = 0xFFFF;
/*
* S2IO device table.
@@ -1476,6 +1491,19 @@ static int init_nic(struct s2io_nic *nic)
writel((u32) (val64 >> 32), (add + 4));
val64 = readq(&bar0->mac_cfg);
+ /* Enable FCS stripping by adapter */
+ add = &bar0->mac_cfg;
+ val64 = readq(&bar0->mac_cfg);
+ val64 |= MAC_CFG_RMAC_STRIP_FCS;
+ if (nic->device_type == XFRAME_II_DEVICE)
+ writeq(val64, &bar0->mac_cfg);
+ else {
+ writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+ writel((u32) (val64), add);
+ writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+ writel((u32) (val64 >> 32), (add + 4));
+ }
+
/*
* Set the time value to be inserted in the pause frame
* generated by xena.
@@ -2569,6 +2597,8 @@ static void rx_intr_handler(ring_info_t *ring_data)
#ifndef CONFIG_S2IO_NAPI
int pkt_cnt = 0;
#endif
+ int i;
+
spin_lock(&nic->rx_lock);
if (atomic_read(&nic->card_state) == CARD_DOWN) {
DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n",
@@ -2661,6 +2691,18 @@ static void rx_intr_handler(ring_info_t *ring_data)
break;
#endif
}
+ if (nic->lro) {
+ /* Clear all LRO sessions before exiting */
+ for (i=0; i<MAX_LRO_SESSIONS; i++) {
+ lro_t *lro = &nic->lro0_n[i];
+ if (lro->in_use) {
+ update_L3L4_header(nic, lro);
+ queue_rx_frame(lro->parent);
+ clear_lro_session(lro);
+ }
+ }
+ }
+
spin_unlock(&nic->rx_lock);
}
@@ -3668,23 +3710,32 @@ s2io_msi_handle(int irq, void *dev_id, struct pt_regs *regs)
* else schedule a tasklet to reallocate the buffers.
*/
for (i = 0; i < config->rx_ring_num; i++) {
- int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
- int level = rx_buffer_level(sp, rxb_size, i);
-
- if ((level == PANIC) && (!TASKLET_IN_USE)) {
- DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
- DBG_PRINT(INTR_DBG, "PANIC levels\n");
- if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
- DBG_PRINT(ERR_DBG, "%s:Out of memory",
- dev->name);
- DBG_PRINT(ERR_DBG, " in ISR!!\n");
+ if (!sp->lro) {
+ int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+ int level = rx_buffer_level(sp, rxb_size, i);
+
+ if ((level == PANIC) && (!TASKLET_IN_USE)) {
+ DBG_PRINT(INTR_DBG, "%s: Rx BD hit ",
+ dev->name);
+ DBG_PRINT(INTR_DBG, "PANIC levels\n");
+ if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory",
+ dev->name);
+ DBG_PRINT(ERR_DBG, " in ISR!!\n");
+ clear_bit(0, (&sp->tasklet_status));
+ atomic_dec(&sp->isr_cnt);
+ return IRQ_HANDLED;
+ }
clear_bit(0, (&sp->tasklet_status));
- atomic_dec(&sp->isr_cnt);
- return IRQ_HANDLED;
+ } else if (level == LOW) {
+ tasklet_schedule(&sp->task);
}
- clear_bit(0, (&sp->tasklet_status));
- } else if (level == LOW) {
- tasklet_schedule(&sp->task);
+ }
+ else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory",
+ dev->name);
+ DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+ break;
}
}
@@ -3697,29 +3748,37 @@ s2io_msix_ring_handle(int irq, void *dev_id, struct pt_regs *regs)
{
ring_info_t *ring = (ring_info_t *)dev_id;
nic_t *sp = ring->nic;
+ struct net_device *dev = (struct net_device *) dev_id;
int rxb_size, level, rng_n;
atomic_inc(&sp->isr_cnt);
rx_intr_handler(ring);
rng_n = ring->ring_no;
- rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
- level = rx_buffer_level(sp, rxb_size, rng_n);
-
- if ((level == PANIC) && (!TASKLET_IN_USE)) {
- int ret;
- DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
- DBG_PRINT(INTR_DBG, "PANIC levels\n");
- if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
- DBG_PRINT(ERR_DBG, "Out of memory in %s",
- __FUNCTION__);
+ if (!sp->lro) {
+ rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
+ level = rx_buffer_level(sp, rxb_size, rng_n);
+
+ if ((level == PANIC) && (!TASKLET_IN_USE)) {
+ int ret;
+ DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
+ DBG_PRINT(INTR_DBG, "PANIC levels\n");
+ if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "Out of memory in %s",
+ __FUNCTION__);
+ clear_bit(0, (&sp->tasklet_status));
+ return IRQ_HANDLED;
+ }
clear_bit(0, (&sp->tasklet_status));
- return IRQ_HANDLED;
+ } else if (level == LOW) {
+ tasklet_schedule(&sp->task);
}
- clear_bit(0, (&sp->tasklet_status));
- } else if (level == LOW) {
- tasklet_schedule(&sp->task);
}
+ else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory", dev->name);
+ DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+ }
+
atomic_dec(&sp->isr_cnt);
return IRQ_HANDLED;
@@ -3875,24 +3934,33 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs)
*/
#ifndef CONFIG_S2IO_NAPI
for (i = 0; i < config->rx_ring_num; i++) {
- int ret;
- int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
- int level = rx_buffer_level(sp, rxb_size, i);
-
- if ((level == PANIC) && (!TASKLET_IN_USE)) {
- DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
- DBG_PRINT(INTR_DBG, "PANIC levels\n");
- if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
- DBG_PRINT(ERR_DBG, "%s:Out of memory",
- dev->name);
- DBG_PRINT(ERR_DBG, " in ISR!!\n");
+ if (!sp->lro) {
+ int ret;
+ int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+ int level = rx_buffer_level(sp, rxb_size, i);
+
+ if ((level == PANIC) && (!TASKLET_IN_USE)) {
+ DBG_PRINT(INTR_DBG, "%s: Rx BD hit ",
+ dev->name);
+ DBG_PRINT(INTR_DBG, "PANIC levels\n");
+ if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory",
+ dev->name);
+ DBG_PRINT(ERR_DBG, " in ISR!!\n");
+ clear_bit(0, (&sp->tasklet_status));
+ atomic_dec(&sp->isr_cnt);
+ return IRQ_HANDLED;
+ }
clear_bit(0, (&sp->tasklet_status));
- atomic_dec(&sp->isr_cnt);
- return IRQ_HANDLED;
+ } else if (level == LOW) {
+ tasklet_schedule(&sp->task);
}
- clear_bit(0, (&sp->tasklet_status));
- } else if (level == LOW) {
- tasklet_schedule(&sp->task);
+ }
+ else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory",
+ dev->name);
+ DBG_PRINT(ERR_DBG, " in Rx intr!!\n");
+ break;
}
}
#endif
@@ -5043,6 +5111,7 @@ static void s2io_get_ethtool_stats(struct net_device *dev,
int i = 0;
nic_t *sp = dev->priv;
StatInfo_t *stat_info = sp->mac_control.stats_info;
+ u64 tmp;
s2io_updt_stats(sp);
tmp_stats[i++] =
@@ -5134,6 +5203,16 @@ static void s2io_get_ethtool_stats(struct net_device *dev,
tmp_stats[i++] = 0;
tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs;
tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs;
+ tmp_stats[i++] = stat_info->sw_stat.clubbed_frms_cnt;
+ tmp_stats[i++] = stat_info->sw_stat.sending_both;
+ tmp_stats[i++] = stat_info->sw_stat.outof_sequence_pkts;
+ tmp_stats[i++] = stat_info->sw_stat.flush_max_pkts;
+ tmp = 0;
+ if (stat_info->sw_stat.num_aggregations) {
+ tmp = stat_info->sw_stat.sum_avg_pkts_aggregated;
+ do_div(tmp, stat_info->sw_stat.num_aggregations);
+ }
+ tmp_stats[i++] = tmp;
}
static int s2io_ethtool_get_regs_len(struct net_device *dev)
@@ -5515,6 +5594,14 @@ static int s2io_card_up(nic_t * sp)
/* Setting its receive mode */
s2io_set_multicast(dev);
+ if (sp->lro) {
+ /* Initialize max aggregatable pkts based on MTU */
+ sp->lro_max_aggr_per_sess = ((1<<16) - 1) / dev->mtu;
+ /* Check if we can use(if specified) user provided value */
+ if (lro_max_pkts < sp->lro_max_aggr_per_sess)
+ sp->lro_max_aggr_per_sess = lro_max_pkts;
+ }
+
/* Enable tasklet for the device */
tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev);
@@ -5607,6 +5694,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
((unsigned long) rxdp->Host_Control);
int ring_no = ring_data->ring_no;
u16 l3_csum, l4_csum;
+ lro_t *lro;
skb->dev = dev;
if (rxdp->Control_1 & RXD_T_CODE) {
@@ -5655,7 +5743,8 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
skb_put(skb, buf2_len);
}
- if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) &&
+ if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) ||
+ (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) &&
(sp->rx_csum)) {
l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1);
@@ -5666,6 +5755,54 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
* a flag in the RxD.
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (sp->lro) {
+ u32 tcp_len;
+ u8 *tcp;
+ int ret = 0;
+
+ ret = s2io_club_tcp_session(skb->data, &tcp,
+ &tcp_len, &lro, rxdp, sp);
+ switch (ret) {
+ case 3: /* Begin anew */
+ lro->parent = skb;
+ goto aggregate;
+ case 1: /* Aggregate */
+ {
+ lro_append_pkt(sp, lro,
+ skb, tcp_len);
+ goto aggregate;
+ }
+ case 4: /* Flush session */
+ {
+ lro_append_pkt(sp, lro,
+ skb, tcp_len);
+ queue_rx_frame(lro->parent);
+ clear_lro_session(lro);
+ sp->mac_control.stats_info->
+ sw_stat.flush_max_pkts++;
+ goto aggregate;
+ }
+ case 2: /* Flush both */
+ lro->parent->data_len =
+ lro->frags_len;
+ sp->mac_control.stats_info->
+ sw_stat.sending_both++;
+ queue_rx_frame(lro->parent);
+ clear_lro_session(lro);
+ goto send_up;
+ case 0: /* sessions exceeded */
+ case 5: /*
+ * First pkt in session not
+ * L3/L4 aggregatable
+ */
+ break;
+ default:
+ DBG_PRINT(ERR_DBG,
+ "%s: Samadhana!!\n",
+ __FUNCTION__);
+ BUG();
+ }
+ }
} else {
/*
* Packet with erroneous checksum, let the
@@ -5677,25 +5814,31 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
skb->ip_summed = CHECKSUM_NONE;
}
- skb->protocol = eth_type_trans(skb, dev);
+ if (!sp->lro) {
+ skb->protocol = eth_type_trans(skb, dev);
#ifdef CONFIG_S2IO_NAPI
- if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
- /* Queueing the vlan frame to the upper layer */
- vlan_hwaccel_receive_skb(skb, sp->vlgrp,
- RXD_GET_VLAN_TAG(rxdp->Control_2));
- } else {
- netif_receive_skb(skb);
- }
+ if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+ /* Queueing the vlan frame to the upper layer */
+ vlan_hwaccel_receive_skb(skb, sp->vlgrp,
+ RXD_GET_VLAN_TAG(rxdp->Control_2));
+ } else {
+ netif_receive_skb(skb);
+ }
#else
- if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
- /* Queueing the vlan frame to the upper layer */
- vlan_hwaccel_rx(skb, sp->vlgrp,
- RXD_GET_VLAN_TAG(rxdp->Control_2));
- } else {
- netif_rx(skb);
- }
+ if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+ /* Queueing the vlan frame to the upper layer */
+ vlan_hwaccel_rx(skb, sp->vlgrp,
+ RXD_GET_VLAN_TAG(rxdp->Control_2));
+ } else {
+ netif_rx(skb);
+ }
#endif
+ } else {
+send_up:
+ queue_rx_frame(skb);
+ }
dev->last_rx = jiffies;
+aggregate:
atomic_dec(&sp->rx_bufs_left[ring_no]);
return SUCCESS;
}
@@ -5807,6 +5950,8 @@ module_param(indicate_max_pkts, int, 0);
#endif
module_param(rxsync_frequency, int, 0);
module_param(intr_type, int, 0);
+module_param(lro, int, 0);
+module_param(lro_max_pkts, int, 0);
/**
* s2io_init_nic - Initialization of the adapter .
@@ -5938,6 +6083,7 @@ Defaulting to INTA\n");
else
sp->device_type = XFRAME_I_DEVICE;
+ sp->lro = lro;
/* Initialize some PCI/PCI-X fields of the NIC. */
s2io_init_pci(sp);
@@ -6241,6 +6387,10 @@ Defaulting to INTA\n");
DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been "
"enabled\n",dev->name);
+ if (sp->lro)
+ DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
+ dev->name);
+
/* Initialize device name */
strcpy(sp->name, dev->name);
if (sp->device_type & XFRAME_II_DEVICE)
@@ -6351,3 +6501,318 @@ static void s2io_closer(void)
module_init(s2io_starter);
module_exit(s2io_closer);
+
+static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip,
+ struct tcphdr **tcp, RxD_t *rxdp)
+{
+ int ip_off;
+ u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len;
+
+ if (!(rxdp->Control_1 & RXD_FRAME_PROTO_TCP)) {
+ DBG_PRINT(INIT_DBG,"%s: Non-TCP frames not supported for LRO\n",
+ __FUNCTION__);
+ return -1;
+ }
+
+ /* TODO:
+ * By default the VLAN field in the MAC is stripped by the card, if this
+ * feature is turned off in rx_pa_cfg register, then the ip_off field
+ * has to be shifted by a further 2 bytes
+ */
+ switch (l2_type) {
+ case 0: /* DIX type */
+ case 4: /* DIX type with VLAN */
+ ip_off = HEADER_ETHERNET_II_802_3_SIZE;
+ break;
+ /* LLC, SNAP etc are considered non-mergeable */
+ default:
+ return -1;
+ }
+
+ *ip = (struct iphdr *)((u8 *)buffer + ip_off);
+ ip_len = (u8)((*ip)->ihl);
+ ip_len <<= 2;
+ *tcp = (struct tcphdr *)((unsigned long)*ip + ip_len);
+
+ return 0;
+}
+
+static int check_for_socket_match(lro_t *lro, struct iphdr *ip,
+ struct tcphdr *tcp)
+{
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+ if ((lro->iph->saddr != ip->saddr) || (lro->iph->daddr != ip->daddr) ||
+ (lro->tcph->source != tcp->source) || (lro->tcph->dest != tcp->dest))
+ return -1;
+ return 0;
+}
+
+static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp)
+{
+ return(ntohs(ip->tot_len) - (ip->ihl << 2) - (tcp->doff << 2));
+}
+
+static void initiate_new_session(lro_t *lro, u8 *l2h,
+ struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len)
+{
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+ lro->l2h = l2h;
+ lro->iph = ip;
+ lro->tcph = tcp;
+ lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq);
+ lro->tcp_ack = ntohl(tcp->ack_seq);
+ lro->sg_num = 1;
+ lro->total_len = ntohs(ip->tot_len);
+ lro->frags_len = 0;
+ /*
+ * check if we saw TCP timestamp. Other consistency checks have
+ * already been done.
+ */
+ if (tcp->doff == 8) {
+ u32 *ptr;
+ ptr = (u32 *)(tcp+1);
+ lro->saw_ts = 1;
+ lro->cur_tsval = *(ptr+1);
+ lro->cur_tsecr = *(ptr+2);
+ }
+ lro->in_use = 1;
+}
+
+static void update_L3L4_header(nic_t *sp, lro_t *lro)
+{
+ struct iphdr *ip = lro->iph;
+ struct tcphdr *tcp = lro->tcph;
+ u16 nchk;
+ StatInfo_t *statinfo = sp->mac_control.stats_info;
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+
+ /* Update L3 header */
+ ip->tot_len = htons(lro->total_len);
+ ip->check = 0;
+ nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl);
+ ip->check = nchk;
+
+ /* Update L4 header */
+ tcp->ack_seq = lro->tcp_ack;
+ tcp->window = lro->window;
+
+ /* Update tsecr field if this session has timestamps enabled */
+ if (lro->saw_ts) {
+ u32 *ptr = (u32 *)(tcp + 1);
+ *(ptr+2) = lro->cur_tsecr;
+ }
+
+ /* Update counters required for calculation of
+ * average no. of packets aggregated.
+ */
+ statinfo->sw_stat.sum_avg_pkts_aggregated += lro->sg_num;
+ statinfo->sw_stat.num_aggregations++;
+}
+
+static void aggregate_new_rx(lro_t *lro, struct iphdr *ip,
+ struct tcphdr *tcp, u32 l4_pyld)
+{
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+ lro->total_len += l4_pyld;
+ lro->frags_len += l4_pyld;
+ lro->tcp_next_seq += l4_pyld;
+ lro->sg_num++;
+
+ /* Update ack seq no. and window ad(from this pkt) in LRO object */
+ lro->tcp_ack = tcp->ack_seq;
+ lro->window = tcp->window;
+
+ if (lro->saw_ts) {
+ u32 *ptr;
+ /* Update tsecr and tsval from this packet */
+ ptr = (u32 *) (tcp + 1);
+ lro->cur_tsval = *(ptr + 1);
+ lro->cur_tsecr = *(ptr + 2);
+ }
+}
+
+static int verify_l3_l4_lro_capable(lro_t *l_lro, struct iphdr *ip,
+ struct tcphdr *tcp, u32 tcp_pyld_len)
+{
+ u8 *ptr;
+
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+
+ if (!tcp_pyld_len) {
+ /* Runt frame or a pure ack */
+ return -1;
+ }
+
+ if (ip->ihl != 5) /* IP has options */
+ return -1;
+
+ if (tcp->urg || tcp->psh || tcp->rst || tcp->syn || tcp->fin ||
+ !tcp->ack) {
+ /*
+ * Currently recognize only the ack control word and
+ * any other control field being set would result in
+ * flushing the LRO session
+ */
+ return -1;
+ }
+
+ /*
+ * Allow only one TCP timestamp option. Don't aggregate if
+ * any other options are detected.
+ */
+ if (tcp->doff != 5 && tcp->doff != 8)
+ return -1;
+
+ if (tcp->doff == 8) {
+ ptr = (u8 *)(tcp + 1);
+ while (*ptr == TCPOPT_NOP)
+ ptr++;
+ if (*ptr != TCPOPT_TIMESTAMP || *(ptr+1) != TCPOLEN_TIMESTAMP)
+ return -1;
+
+ /* Ensure timestamp value increases monotonically */
+ if (l_lro)
+ if (l_lro->cur_tsval > *((u32 *)(ptr+2)))
+ return -1;
+
+ /* timestamp echo reply should be non-zero */
+ if (*((u32 *)(ptr+6)) == 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro,
+ RxD_t *rxdp, nic_t *sp)
+{
+ struct iphdr *ip;
+ struct tcphdr *tcph;
+ int ret = 0, i;
+
+ if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp,
+ rxdp))) {
+ DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n",
+ ip->saddr, ip->daddr);
+ } else {
+ return ret;
+ }
+
+ tcph = (struct tcphdr *)*tcp;
+ *tcp_len = get_l4_pyld_length(ip, tcph);
+ for (i=0; i<MAX_LRO_SESSIONS; i++) {
+ lro_t *l_lro = &sp->lro0_n[i];
+ if (l_lro->in_use) {
+ if (check_for_socket_match(l_lro, ip, tcph))
+ continue;
+ /* Sock pair matched */
+ *lro = l_lro;
+
+ if ((*lro)->tcp_next_seq != ntohl(tcph->seq)) {
+ DBG_PRINT(INFO_DBG, "%s:Out of order. expected "
+ "0x%x, actual 0x%x\n", __FUNCTION__,
+ (*lro)->tcp_next_seq,
+ ntohl(tcph->seq));
+
+ sp->mac_control.stats_info->
+ sw_stat.outof_sequence_pkts++;
+ ret = 2;
+ break;
+ }
+
+ if (!verify_l3_l4_lro_capable(l_lro, ip, tcph,*tcp_len))
+ ret = 1; /* Aggregate */
+ else
+ ret = 2; /* Flush both */
+ break;
+ }
+ }
+
+ if (ret == 0) {
+ /* Before searching for available LRO objects,
+ * check if the pkt is L3/L4 aggregatable. If not
+ * don't create new LRO session. Just send this
+ * packet up.
+ */
+ if (verify_l3_l4_lro_capable(NULL, ip, tcph, *tcp_len)) {
+ return 5;
+ }
+
+ for (i=0; i<MAX_LRO_SESSIONS; i++) {
+ lro_t *l_lro = &sp->lro0_n[i];
+ if (!(l_lro->in_use)) {
+ *lro = l_lro;
+ ret = 3; /* Begin anew */
+ break;
+ }
+ }
+ }
+
+ if (ret == 0) { /* sessions exceeded */
+ DBG_PRINT(INFO_DBG,"%s:All LRO sessions already in use\n",
+ __FUNCTION__);
+ *lro = NULL;
+ return ret;
+ }
+
+ switch (ret) {
+ case 3:
+ initiate_new_session(*lro, buffer, ip, tcph, *tcp_len);
+ break;
+ case 2:
+ update_L3L4_header(sp, *lro);
+ break;
+ case 1:
+ aggregate_new_rx(*lro, ip, tcph, *tcp_len);
+ if ((*lro)->sg_num == sp->lro_max_aggr_per_sess) {
+ update_L3L4_header(sp, *lro);
+ ret = 4; /* Flush the LRO */
+ }
+ break;
+ default:
+ DBG_PRINT(ERR_DBG,"%s:Dont know, can't say!!\n",
+ __FUNCTION__);
+ break;
+ }
+
+ return ret;
+}
+
+static void clear_lro_session(lro_t *lro)
+{
+ static u16 lro_struct_size = sizeof(lro_t);
+
+ memset(lro, 0, lro_struct_size);
+}
+
+static void queue_rx_frame(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+
+ skb->protocol = eth_type_trans(skb, dev);
+#ifdef CONFIG_S2IO_NAPI
+ netif_receive_skb(skb);
+#else
+ netif_rx(skb);
+#endif
+}
+
+static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb,
+ u32 tcp_len)
+{
+ struct sk_buff *tmp, *first = lro->parent;
+
+ first->len += tcp_len;
+ first->data_len = lro->frags_len;
+ skb_pull(skb, (skb->len - tcp_len));
+ if ((tmp = skb_shinfo(first)->frag_list)) {
+ while (tmp->next)
+ tmp = tmp->next;
+ tmp->next = skb;
+ }
+ else
+ skb_shinfo(first)->frag_list = skb;
+ sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++;
+ return;
+}
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h
index 68ae3365194..0a0b5b29d81 100644
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -78,6 +78,13 @@ static int debug_level = ERR_DBG;
typedef struct {
unsigned long long single_ecc_errs;
unsigned long long double_ecc_errs;
+ /* LRO statistics */
+ unsigned long long clubbed_frms_cnt;
+ unsigned long long sending_both;
+ unsigned long long outof_sequence_pkts;
+ unsigned long long flush_max_pkts;
+ unsigned long long sum_avg_pkts_aggregated;
+ unsigned long long num_aggregations;
} swStat_t;
/* The statistics block of Xena */
@@ -680,6 +687,24 @@ struct msix_info_st {
u64 data;
};
+/* Data structure to represent a LRO session */
+typedef struct lro {
+ struct sk_buff *parent;
+ u8 *l2h;
+ struct iphdr *iph;
+ struct tcphdr *tcph;
+ u32 tcp_next_seq;
+ u32 tcp_ack;
+ int total_len;
+ int frags_len;
+ int sg_num;
+ int in_use;
+ u16 window;
+ u32 cur_tsval;
+ u32 cur_tsecr;
+ u8 saw_ts;
+}lro_t;
+
/* Structure representing one instance of the NIC */
struct s2io_nic {
int rxd_mode;
@@ -784,6 +809,13 @@ struct s2io_nic {
#define XFRAME_II_DEVICE 2
u8 device_type;
+#define MAX_LRO_SESSIONS 32
+ lro_t lro0_n[MAX_LRO_SESSIONS];
+ unsigned long clubbed_frms_cnt;
+ unsigned long sending_both;
+ u8 lro;
+ u16 lro_max_aggr_per_sess;
+
#define INTA 0
#define MSI 1
#define MSI_X 2
@@ -937,4 +969,10 @@ static void s2io_card_down(nic_t *nic);
static int s2io_card_up(nic_t *nic);
static int get_xena_rev_id(struct pci_dev *pdev);
static void restore_xmsi_data(nic_t *nic);
+
+static int s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro, RxD_t *rxdp, nic_t *sp);
+static void clear_lro_session(lro_t *lro);
+static void queue_rx_frame(struct sk_buff *skb);
+static void update_L3L4_header(nic_t *sp, lro_t *lro);
+static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb, u32 tcp_len);
#endif /* _S2IO_H */