aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/igbvf/netdev.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel/igbvf/netdev.c')
-rw-r--r--drivers/net/ethernet/intel/igbvf/netdev.c2859
1 files changed, 2859 insertions, 0 deletions
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
new file mode 100644
index 00000000000..40ed066e3ef
--- /dev/null
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -0,0 +1,2859 @@
+/*******************************************************************************
+
+ Intel(R) 82576 Virtual Function Linux driver
+ Copyright(c) 2009 - 2010 Intel Corporation.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/tcp.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/prefetch.h>
+
+#include "igbvf.h"
+
+#define DRV_VERSION "2.0.0-k"
+char igbvf_driver_name[] = "igbvf";
+const char igbvf_driver_version[] = DRV_VERSION;
+static const char igbvf_driver_string[] =
+ "Intel(R) Virtual Function Network Driver";
+static const char igbvf_copyright[] =
+ "Copyright (c) 2009 - 2010 Intel Corporation.";
+
+static int igbvf_poll(struct napi_struct *napi, int budget);
+static void igbvf_reset(struct igbvf_adapter *);
+static void igbvf_set_interrupt_capability(struct igbvf_adapter *);
+static void igbvf_reset_interrupt_capability(struct igbvf_adapter *);
+
+static struct igbvf_info igbvf_vf_info = {
+ .mac = e1000_vfadapt,
+ .flags = 0,
+ .pba = 10,
+ .init_ops = e1000_init_function_pointers_vf,
+};
+
+static struct igbvf_info igbvf_i350_vf_info = {
+ .mac = e1000_vfadapt_i350,
+ .flags = 0,
+ .pba = 10,
+ .init_ops = e1000_init_function_pointers_vf,
+};
+
+static const struct igbvf_info *igbvf_info_tbl[] = {
+ [board_vf] = &igbvf_vf_info,
+ [board_i350_vf] = &igbvf_i350_vf_info,
+};
+
+/**
+ * igbvf_desc_unused - calculate if we have unused descriptors
+ **/
+static int igbvf_desc_unused(struct igbvf_ring *ring)
+{
+ if (ring->next_to_clean > ring->next_to_use)
+ return ring->next_to_clean - ring->next_to_use - 1;
+
+ return ring->count + ring->next_to_clean - ring->next_to_use - 1;
+}
+
+/**
+ * igbvf_receive_skb - helper function to handle Rx indications
+ * @adapter: board private structure
+ * @status: descriptor status field as written by hardware
+ * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
+ * @skb: pointer to sk_buff to be indicated to stack
+ **/
+static void igbvf_receive_skb(struct igbvf_adapter *adapter,
+ struct net_device *netdev,
+ struct sk_buff *skb,
+ u32 status, u16 vlan)
+{
+ if (status & E1000_RXD_STAT_VP) {
+ u16 vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK;
+
+ __vlan_hwaccel_put_tag(skb, vid);
+ }
+ netif_receive_skb(skb);
+}
+
+static inline void igbvf_rx_checksum_adv(struct igbvf_adapter *adapter,
+ u32 status_err, struct sk_buff *skb)
+{
+ skb_checksum_none_assert(skb);
+
+ /* Ignore Checksum bit is set or checksum is disabled through ethtool */
+ if ((status_err & E1000_RXD_STAT_IXSM) ||
+ (adapter->flags & IGBVF_FLAG_RX_CSUM_DISABLED))
+ return;
+
+ /* TCP/UDP checksum error bit is set */
+ if (status_err &
+ (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
+ /* let the stack verify checksum errors */
+ adapter->hw_csum_err++;
+ return;
+ }
+
+ /* It must be a TCP or UDP packet with a valid checksum */
+ if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ adapter->hw_csum_good++;
+}
+
+/**
+ * igbvf_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @rx_ring: address of ring structure to repopulate
+ * @cleaned_count: number of buffers to repopulate
+ **/
+static void igbvf_alloc_rx_buffers(struct igbvf_ring *rx_ring,
+ int cleaned_count)
+{
+ struct igbvf_adapter *adapter = rx_ring->adapter;
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ union e1000_adv_rx_desc *rx_desc;
+ struct igbvf_buffer *buffer_info;
+ struct sk_buff *skb;
+ unsigned int i;
+ int bufsz;
+
+ i = rx_ring->next_to_use;
+ buffer_info = &rx_ring->buffer_info[i];
+
+ if (adapter->rx_ps_hdr_size)
+ bufsz = adapter->rx_ps_hdr_size;
+ else
+ bufsz = adapter->rx_buffer_len;
+
+ while (cleaned_count--) {
+ rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i);
+
+ if (adapter->rx_ps_hdr_size && !buffer_info->page_dma) {
+ if (!buffer_info->page) {
+ buffer_info->page = alloc_page(GFP_ATOMIC);
+ if (!buffer_info->page) {
+ adapter->alloc_rx_buff_failed++;
+ goto no_buffers;
+ }
+ buffer_info->page_offset = 0;
+ } else {
+ buffer_info->page_offset ^= PAGE_SIZE / 2;
+ }
+ buffer_info->page_dma =
+ dma_map_page(&pdev->dev, buffer_info->page,
+ buffer_info->page_offset,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
+ }
+
+ if (!buffer_info->skb) {
+ skb = netdev_alloc_skb_ip_align(netdev, bufsz);
+ if (!skb) {
+ adapter->alloc_rx_buff_failed++;
+ goto no_buffers;
+ }
+
+ buffer_info->skb = skb;
+ buffer_info->dma = dma_map_single(&pdev->dev, skb->data,
+ bufsz,
+ DMA_FROM_DEVICE);
+ }
+ /* Refresh the desc even if buffer_addrs didn't change because
+ * each write-back erases this info. */
+ if (adapter->rx_ps_hdr_size) {
+ rx_desc->read.pkt_addr =
+ cpu_to_le64(buffer_info->page_dma);
+ rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
+ } else {
+ rx_desc->read.pkt_addr =
+ cpu_to_le64(buffer_info->dma);
+ rx_desc->read.hdr_addr = 0;
+ }
+
+ i++;
+ if (i == rx_ring->count)
+ i = 0;
+ buffer_info = &rx_ring->buffer_info[i];
+ }
+
+no_buffers:
+ if (rx_ring->next_to_use != i) {
+ rx_ring->next_to_use = i;
+ if (i == 0)
+ i = (rx_ring->count - 1);
+ else
+ i--;
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64). */
+ wmb();
+ writel(i, adapter->hw.hw_addr + rx_ring->tail);
+ }
+}
+
+/**
+ * igbvf_clean_rx_irq - Send received data up the network stack; legacy
+ * @adapter: board private structure
+ *
+ * the return value indicates whether actual cleaning was done, there
+ * is no guarantee that everything was cleaned
+ **/
+static bool igbvf_clean_rx_irq(struct igbvf_adapter *adapter,
+ int *work_done, int work_to_do)
+{
+ struct igbvf_ring *rx_ring = adapter->rx_ring;
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ union e1000_adv_rx_desc *rx_desc, *next_rxd;
+ struct igbvf_buffer *buffer_info, *next_buffer;
+ struct sk_buff *skb;
+ bool cleaned = false;
+ int cleaned_count = 0;
+ unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int i;
+ u32 length, hlen, staterr;
+
+ i = rx_ring->next_to_clean;
+ rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i);
+ staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+
+ while (staterr & E1000_RXD_STAT_DD) {
+ if (*work_done >= work_to_do)
+ break;
+ (*work_done)++;
+ rmb(); /* read descriptor and rx_buffer_info after status DD */
+
+ buffer_info = &rx_ring->buffer_info[i];
+
+ /* HW will not DMA in data larger than the given buffer, even
+ * if it parses the (NFS, of course) header to be larger. In
+ * that case, it fills the header buffer and spills the rest
+ * into the page.
+ */
+ hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info) &
+ E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
+ if (hlen > adapter->rx_ps_hdr_size)
+ hlen = adapter->rx_ps_hdr_size;
+
+ length = le16_to_cpu(rx_desc->wb.upper.length);
+ cleaned = true;
+ cleaned_count++;
+
+ skb = buffer_info->skb;
+ prefetch(skb->data - NET_IP_ALIGN);
+ buffer_info->skb = NULL;
+ if (!adapter->rx_ps_hdr_size) {
+ dma_unmap_single(&pdev->dev, buffer_info->dma,
+ adapter->rx_buffer_len,
+ DMA_FROM_DEVICE);
+ buffer_info->dma = 0;
+ skb_put(skb, length);
+ goto send_up;
+ }
+
+ if (!skb_shinfo(skb)->nr_frags) {
+ dma_unmap_single(&pdev->dev, buffer_info->dma,
+ adapter->rx_ps_hdr_size,
+ DMA_FROM_DEVICE);
+ skb_put(skb, hlen);
+ }
+
+ if (length) {
+ dma_unmap_page(&pdev->dev, buffer_info->page_dma,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
+ buffer_info->page_dma = 0;
+
+ skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+ buffer_info->page,
+ buffer_info->page_offset,
+ length);
+
+ if ((adapter->rx_buffer_len > (PAGE_SIZE / 2)) ||
+ (page_count(buffer_info->page) != 1))
+ buffer_info->page = NULL;
+ else
+ get_page(buffer_info->page);
+
+ skb->len += length;
+ skb->data_len += length;
+ skb->truesize += length;
+ }
+send_up:
+ i++;
+ if (i == rx_ring->count)
+ i = 0;
+ next_rxd = IGBVF_RX_DESC_ADV(*rx_ring, i);
+ prefetch(next_rxd);
+ next_buffer = &rx_ring->buffer_info[i];
+
+ if (!(staterr & E1000_RXD_STAT_EOP)) {
+ buffer_info->skb = next_buffer->skb;
+ buffer_info->dma = next_buffer->dma;
+ next_buffer->skb = skb;
+ next_buffer->dma = 0;
+ goto next_desc;
+ }
+
+ if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
+ dev_kfree_skb_irq(skb);
+ goto next_desc;
+ }
+
+ total_bytes += skb->len;
+ total_packets++;
+
+ igbvf_rx_checksum_adv(adapter, staterr, skb);
+
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ igbvf_receive_skb(adapter, netdev, skb, staterr,
+ rx_desc->wb.upper.vlan);
+
+next_desc:
+ rx_desc->wb.upper.status_error = 0;
+
+ /* return some buffers to hardware, one at a time is too slow */
+ if (cleaned_count >= IGBVF_RX_BUFFER_WRITE) {
+ igbvf_alloc_rx_buffers(rx_ring, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ /* use prefetched values */
+ rx_desc = next_rxd;
+ buffer_info = next_buffer;
+
+ staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+ }
+
+ rx_ring->next_to_clean = i;
+ cleaned_count = igbvf_desc_unused(rx_ring);
+
+ if (cleaned_count)
+ igbvf_alloc_rx_buffers(rx_ring, cleaned_count);
+
+ adapter->total_rx_packets += total_packets;
+ adapter->total_rx_bytes += total_bytes;
+ adapter->net_stats.rx_bytes += total_bytes;
+ adapter->net_stats.rx_packets += total_packets;
+ return cleaned;
+}
+
+static void igbvf_put_txbuf(struct igbvf_adapter *adapter,
+ struct igbvf_buffer *buffer_info)
+{
+ if (buffer_info->dma) {
+ if (buffer_info->mapped_as_page)
+ dma_unmap_page(&adapter->pdev->dev,
+ buffer_info->dma,
+ buffer_info->length,
+ DMA_TO_DEVICE);
+ else
+ dma_unmap_single(&adapter->pdev->dev,
+ buffer_info->dma,
+ buffer_info->length,
+ DMA_TO_DEVICE);
+ buffer_info->dma = 0;
+ }
+ if (buffer_info->skb) {
+ dev_kfree_skb_any(buffer_info->skb);
+ buffer_info->skb = NULL;
+ }
+ buffer_info->time_stamp = 0;
+}
+
+/**
+ * igbvf_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+int igbvf_setup_tx_resources(struct igbvf_adapter *adapter,
+ struct igbvf_ring *tx_ring)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int size;
+
+ size = sizeof(struct igbvf_buffer) * tx_ring->count;
+ tx_ring->buffer_info = vzalloc(size);
+ if (!tx_ring->buffer_info)
+ goto err;
+
+ /* round up to nearest 4K */
+ tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
+ tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+ tx_ring->desc = dma_alloc_coherent(&pdev->dev, tx_ring->size,
+ &tx_ring->dma, GFP_KERNEL);
+
+ if (!tx_ring->desc)
+ goto err;
+
+ tx_ring->adapter = adapter;
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+
+ return 0;
+err:
+ vfree(tx_ring->buffer_info);
+ dev_err(&adapter->pdev->dev,
+ "Unable to allocate memory for the transmit descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * igbvf_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int igbvf_setup_rx_resources(struct igbvf_adapter *adapter,
+ struct igbvf_ring *rx_ring)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int size, desc_len;
+
+ size = sizeof(struct igbvf_buffer) * rx_ring->count;
+ rx_ring->buffer_info = vzalloc(size);
+ if (!rx_ring->buffer_info)
+ goto err;
+
+ desc_len = sizeof(union e1000_adv_rx_desc);
+
+ /* Round up to nearest 4K */
+ rx_ring->size = rx_ring->count * desc_len;
+ rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+ rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
+ &rx_ring->dma, GFP_KERNEL);
+
+ if (!rx_ring->desc)
+ goto err;
+
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+
+ rx_ring->adapter = adapter;
+
+ return 0;
+
+err:
+ vfree(rx_ring->buffer_info);
+ rx_ring->buffer_info = NULL;
+ dev_err(&adapter->pdev->dev,
+ "Unable to allocate memory for the receive descriptor ring\n");
+ return -ENOMEM;
+}
+
+/**
+ * igbvf_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void igbvf_clean_tx_ring(struct igbvf_ring *tx_ring)
+{
+ struct igbvf_adapter *adapter = tx_ring->adapter;
+ struct igbvf_buffer *buffer_info;
+ unsigned long size;
+ unsigned int i;
+
+ if (!tx_ring->buffer_info)
+ return;
+
+ /* Free all the Tx ring sk_buffs */
+ for (i = 0; i < tx_ring->count; i++) {
+ buffer_info = &tx_ring->buffer_info[i];
+ igbvf_put_txbuf(adapter, buffer_info);
+ }
+
+ size = sizeof(struct igbvf_buffer) * tx_ring->count;
+ memset(tx_ring->buffer_info, 0, size);
+
+ /* Zero out the descriptor ring */
+ memset(tx_ring->desc, 0, tx_ring->size);
+
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
+
+ writel(0, adapter->hw.hw_addr + tx_ring->head);
+ writel(0, adapter->hw.hw_addr + tx_ring->tail);
+}
+
+/**
+ * igbvf_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: ring to free resources from
+ *
+ * Free all transmit software resources
+ **/
+void igbvf_free_tx_resources(struct igbvf_ring *tx_ring)
+{
+ struct pci_dev *pdev = tx_ring->adapter->pdev;
+
+ igbvf_clean_tx_ring(tx_ring);
+
+ vfree(tx_ring->buffer_info);
+ tx_ring->buffer_info = NULL;
+
+ dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
+ tx_ring->dma);
+
+ tx_ring->desc = NULL;
+}
+
+/**
+ * igbvf_clean_rx_ring - Free Rx Buffers per Queue
+ * @adapter: board private structure
+ **/
+static void igbvf_clean_rx_ring(struct igbvf_ring *rx_ring)
+{
+ struct igbvf_adapter *adapter = rx_ring->adapter;
+ struct igbvf_buffer *buffer_info;
+ struct pci_dev *pdev = adapter->pdev;
+ unsigned long size;
+ unsigned int i;
+
+ if (!rx_ring->buffer_info)
+ return;
+
+ /* Free all the Rx ring sk_buffs */
+ for (i = 0; i < rx_ring->count; i++) {
+ buffer_info = &rx_ring->buffer_info[i];
+ if (buffer_info->dma) {
+ if (adapter->rx_ps_hdr_size){
+ dma_unmap_single(&pdev->dev, buffer_info->dma,
+ adapter->rx_ps_hdr_size,
+ DMA_FROM_DEVICE);
+ } else {
+ dma_unmap_single(&pdev->dev, buffer_info->dma,
+ adapter->rx_buffer_len,
+ DMA_FROM_DEVICE);
+ }
+ buffer_info->dma = 0;
+ }
+
+ if (buffer_info->skb) {
+ dev_kfree_skb(buffer_info->skb);
+ buffer_info->skb = NULL;
+ }
+
+ if (buffer_info->page) {
+ if (buffer_info->page_dma)
+ dma_unmap_page(&pdev->dev,
+ buffer_info->page_dma,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
+ put_page(buffer_info->page);
+ buffer_info->page = NULL;
+ buffer_info->page_dma = 0;
+ buffer_info->page_offset = 0;
+ }
+ }
+
+ size = sizeof(struct igbvf_buffer) * rx_ring->count;
+ memset(rx_ring->buffer_info, 0, size);
+
+ /* Zero out the descriptor ring */
+ memset(rx_ring->desc, 0, rx_ring->size);
+
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
+
+ writel(0, adapter->hw.hw_addr + rx_ring->head);
+ writel(0, adapter->hw.hw_addr + rx_ring->tail);
+}
+
+/**
+ * igbvf_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+
+void igbvf_free_rx_resources(struct igbvf_ring *rx_ring)
+{
+ struct pci_dev *pdev = rx_ring->adapter->pdev;
+
+ igbvf_clean_rx_ring(rx_ring);
+
+ vfree(rx_ring->buffer_info);
+ rx_ring->buffer_info = NULL;
+
+ dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
+ rx_ring->dma);
+ rx_ring->desc = NULL;
+}
+
+/**
+ * igbvf_update_itr - update the dynamic ITR value based on statistics
+ * @adapter: pointer to adapter
+ * @itr_setting: current adapter->itr
+ * @packets: the number of packets during this measurement interval
+ * @bytes: the number of bytes during this measurement interval
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput. This functionality is controlled
+ * by the InterruptThrottleRate module parameter.
+ **/
+static unsigned int igbvf_update_itr(struct igbvf_adapter *adapter,
+ u16 itr_setting, int packets,
+ int bytes)
+{
+ unsigned int retval = itr_setting;
+
+ if (packets == 0)
+ goto update_itr_done;
+
+ switch (itr_setting) {
+ case lowest_latency:
+ /* handle TSO and jumbo frames */
+ if (bytes/packets > 8000)
+ retval = bulk_latency;
+ else if ((packets < 5) && (bytes > 512))
+ retval = low_latency;
+ break;
+ case low_latency: /* 50 usec aka 20000 ints/s */
+ if (bytes > 10000) {
+ /* this if handles the TSO accounting */
+ if (bytes/packets > 8000)
+ retval = bulk_latency;
+ else if ((packets < 10) || ((bytes/packets) > 1200))
+ retval = bulk_latency;
+ else if ((packets > 35))
+ retval = lowest_latency;
+ } else if (bytes/packets > 2000) {
+ retval = bulk_latency;
+ } else if (packets <= 2 && bytes < 512) {
+ retval = lowest_latency;
+ }
+ break;
+ case bulk_latency: /* 250 usec aka 4000 ints/s */
+ if (bytes > 25000) {
+ if (packets > 35)
+ retval = low_latency;
+ } else if (bytes < 6000) {
+ retval = low_latency;
+ }
+ break;
+ }
+
+update_itr_done:
+ return retval;
+}
+
+static void igbvf_set_itr(struct igbvf_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u16 current_itr;
+ u32 new_itr = adapter->itr;
+
+ adapter->tx_itr = igbvf_update_itr(adapter, adapter->tx_itr,
+ adapter->total_tx_packets,
+ adapter->total_tx_bytes);
+ /* conservative mode (itr 3) eliminates the lowest_latency setting */
+ if (adapter->itr_setting == 3 && adapter->tx_itr == lowest_latency)
+ adapter->tx_itr = low_latency;
+
+ adapter->rx_itr = igbvf_update_itr(adapter, adapter->rx_itr,
+ adapter->total_rx_packets,
+ adapter->total_rx_bytes);
+ /* conservative mode (itr 3) eliminates the lowest_latency setting */
+ if (adapter->itr_setting == 3 && adapter->rx_itr == lowest_latency)
+ adapter->rx_itr = low_latency;
+
+ current_itr = max(adapter->rx_itr, adapter->tx_itr);
+
+ switch (current_itr) {
+ /* counts and packets in update_itr are dependent on these numbers */
+ case lowest_latency:
+ new_itr = 70000;
+ break;
+ case low_latency:
+ new_itr = 20000; /* aka hwitr = ~200 */
+ break;
+ case bulk_latency:
+ new_itr = 4000;
+ break;
+ default:
+ break;
+ }
+
+ if (new_itr != adapter->itr) {
+ /*
+ * this attempts to bias the interrupt rate towards Bulk
+ * by adding intermediate steps when interrupt rate is
+ * increasing
+ */
+ new_itr = new_itr > adapter->itr ?
+ min(adapter->itr + (new_itr >> 2), new_itr) :
+ new_itr;
+ adapter->itr = new_itr;
+ adapter->rx_ring->itr_val = 1952;
+
+ if (adapter->msix_entries)
+ adapter->rx_ring->set_itr = 1;
+ else
+ ew32(ITR, 1952);
+ }
+}
+
+/**
+ * igbvf_clean_tx_irq - Reclaim resources after transmit completes
+ * @adapter: board private structure
+ * returns true if ring is completely cleaned
+ **/
+static bool igbvf_clean_tx_irq(struct igbvf_ring *tx_ring)
+{
+ struct igbvf_adapter *adapter = tx_ring->adapter;
+ struct net_device *netdev = adapter->netdev;
+ struct igbvf_buffer *buffer_info;
+ struct sk_buff *skb;
+ union e1000_adv_tx_desc *tx_desc, *eop_desc;
+ unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int i, eop, count = 0;
+ bool cleaned = false;
+
+ i = tx_ring->next_to_clean;
+ eop = tx_ring->buffer_info[i].next_to_watch;
+ eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop);
+
+ while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
+ (count < tx_ring->count)) {
+ rmb(); /* read buffer_info after eop_desc status */
+ for (cleaned = false; !cleaned; count++) {
+ tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
+ buffer_info = &tx_ring->buffer_info[i];
+ cleaned = (i == eop);
+ skb = buffer_info->skb;
+
+ if (skb) {
+ unsigned int segs, bytecount;
+
+ /* gso_segs is currently only valid for tcp */
+ segs = skb_shinfo(skb)->gso_segs ?: 1;
+ /* multiply data chunks by size of headers */
+ bytecount = ((segs - 1) * skb_headlen(skb)) +
+ skb->len;
+ total_packets += segs;
+ total_bytes += bytecount;
+ }
+
+ igbvf_put_txbuf(adapter, buffer_info);
+ tx_desc->wb.status = 0;
+
+ i++;
+ if (i == tx_ring->count)
+ i = 0;
+ }
+ eop = tx_ring->buffer_info[i].next_to_watch;
+ eop_desc = IGBVF_TX_DESC_ADV(*tx_ring, eop);
+ }
+
+ tx_ring->next_to_clean = i;
+
+ if (unlikely(count &&
+ netif_carrier_ok(netdev) &&
+ igbvf_desc_unused(tx_ring) >= IGBVF_TX_QUEUE_WAKE)) {
+ /* Make sure that anybody stopping the queue after this
+ * sees the new next_to_clean.
+ */
+ smp_mb();
+ if (netif_queue_stopped(netdev) &&
+ !(test_bit(__IGBVF_DOWN, &adapter->state))) {
+ netif_wake_queue(netdev);
+ ++adapter->restart_queue;
+ }
+ }
+
+ adapter->net_stats.tx_bytes += total_bytes;
+ adapter->net_stats.tx_packets += total_packets;
+ return count < tx_ring->count;
+}
+
+static irqreturn_t igbvf_msix_other(int irq, void *data)
+{
+ struct net_device *netdev = data;
+ struct igbvf_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+
+ adapter->int_counter1++;
+
+ netif_carrier_off(netdev);
+ hw->mac.get_link_status = 1;
+ if (!test_bit(__IGBVF_DOWN, &adapter->state))
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+
+ ew32(EIMS, adapter->eims_other);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t igbvf_intr_msix_tx(int irq, void *data)
+{
+ struct net_device *netdev = data;
+ struct igbvf_adapter *adapter = netdev_priv(netdev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct igbvf_ring *tx_ring = adapter->tx_ring;
+
+
+ adapter->total_tx_bytes = 0;
+ adapter->total_tx_packets = 0;
+
+ /* auto mask will automatically reenable the interrupt when we write
+ * EICS */
+ if (!igbvf_clean_tx_irq(tx_ring))
+ /* Ring was not completely cleaned, so fire another interrupt */
+ ew32(EICS, tx_ring->eims_value);
+ else
+ ew32(EIMS, tx_ring->eims_value);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t igbvf_intr_msix_rx(int irq, void *data)
+{
+ struct net_device *netdev = data;
+ struct igbvf_adapter *adapter = netdev_priv(netdev);
+
+ adapter->int_counter0++;
+
+ /* Write the ITR value calculated at the end of the
+ * previous interrupt.
+ */
+ if (adapter->rx_ring->set_itr) {
+ writel(adapter->rx_ring->itr_val,
+ adapter->hw.hw_addr + adapter->rx_ring->itr_register);
+ adapter->rx_ring->set_itr = 0;
+ }
+
+ if (napi_schedule_prep(&adapter->rx_ring->napi)) {
+ adapter->total_rx_bytes = 0;
+ adapter->total_rx_packets = 0;
+ __napi_schedule(&adapter->rx_ring->napi);
+ }
+
+ return IRQ_HANDLED;
+}
+
+#define IGBVF_NO_QUEUE -1
+
+static void igbvf_assign_vector(struct igbvf_adapter *adapter, int rx_queue,
+ int tx_queue, int msix_vector)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ u32 ivar, index;
+
+ /* 82576 uses a table-based method for assigning vectors.
+ Each queue has a single entry in the table to which we write
+ a vector number along with a "valid" bit. Sadly, the layout
+ of the table is somewhat counterintuitive. */
+ if (rx_queue > IGBVF_NO_QUEUE) {
+ index = (rx_queue >> 1);
+ ivar = array_er32(IVAR0, index);
+ if (rx_queue & 0x1) {
+ /* vector goes into third byte of register */
+ ivar = ivar & 0xFF00FFFF;
+ ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
+ } else {
+ /* vector goes into low byte of register */
+ ivar = ivar & 0xFFFFFF00;
+ ivar |= msix_vector | E1000_IVAR_VALID;
+ }
+ adapter->rx_ring[rx_queue].eims_value = 1 << msix_vector;
+ array_ew32(IVAR0, index, ivar);
+ }
+ if (tx_queue > IGBVF_NO_QUEUE) {
+ index = (tx_queue >> 1);
+ ivar = array_er32(IVAR0, index);
+ if (tx_queue & 0x1) {
+ /* vector goes into high byte of register */
+ ivar = ivar & 0x00FFFFFF;
+ ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
+ } else {
+ /* vector goes into second byte of register */
+ ivar = ivar & 0xFFFF00FF;
+ ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
+ }
+ adapter->tx_ring[tx_queue].eims_value = 1 << msix_vector;
+ array_ew32(IVAR0, index, ivar);
+ }
+}
+
+/**
+ * igbvf_configure_msix - Configure MSI-X hardware
+ *
+ * igbvf_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ **/
+static void igbvf_configure_msix(struct igbvf_adapter *adapter)
+{
+ u32 tmp;
+ struct e1000_hw *hw = &adapter->hw;
+ struct igbvf_ring *tx_ring = adapter->tx_ring;
+ struct igbvf_ring *rx_ring = adapter->rx_ring;
+ int vector = 0;
+
+ adapter->eims_enable_mask = 0;
+
+ igbvf_assign_vector(adapter, IGBVF_NO_QUEUE, 0, vector++);
+ adapter->eims_enable_mask |= tx_ring->eims_value;
+ if (tx_ring->itr_val)
+ writel(tx_ring->itr_val,
+ hw->hw_addr + tx_ring->itr_register);
+ else
+ writel(1952, hw->hw_addr + tx_ring->itr_register);
+
+ igbvf_assign_vector(adapter, 0, IGBVF_NO_QUEUE, vector++);
+ adapter->eims_enable_mask |= rx_ring->eims_value;
+ if (rx_ring->itr_val)
+ writel(rx_ring->itr_val,
+ hw->hw_addr + rx_ring->itr_register);
+ else
+ writel(1952, hw->hw_addr + rx_ring->itr_register);
+
+ /* set vector for other causes, i.e. link changes */
+
+ tmp = (vector++ | E1000_IVAR_VALID);
+
+ ew32(IVAR_MISC, tmp);
+
+ adapter->eims_enable_mask = (1 << (vector)) - 1;
+ adapter->eims_other = 1 << (vector - 1);
+ e1e_flush();
+}
+
+static void igbvf_reset_interrupt_capability(struct igbvf_adapter *adapter)
+{
+ if (adapter->msix_entries) {
+ pci_disable_msix(adapter->pdev);
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+ }
+}
+
+/**
+ * igbvf_set_interrupt_capability - set MSI or MSI-X if supported
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static void igbvf_set_interrupt_capability(struct igbvf_adapter *adapter)
+{
+ int err = -ENOMEM;
+ int i;
+
+ /* we allocate 3 vectors, 1 for tx, 1 for rx, one for pf messages */
+ adapter->msix_entries = kcalloc(3, sizeof(struct msix_entry),
+ GFP_KERNEL);
+ if (adapter->msix_entries) {
+ for (i = 0; i < 3; i++)
+ adapter->msix_entries[i].entry = i;
+
+ err = pci_enable_msix(adapter->pdev,
+ adapter->msix_entries, 3);
+ }
+
+ if (err) {
+ /* MSI-X failed */
+ dev_err(&adapter->pdev->dev,
+ "Failed to initialize MSI-X interrupts.\n");
+ igbvf_reset_interrupt_capability(adapter);
+ }
+}
+
+/**
+ * igbvf_request_msix - Initialize MSI-X interrupts
+ *
+ * igbvf_request_msix allocates MSI-X vectors and requests interrupts from the
+ * kernel.
+ **/
+static int igbvf_request_msix(struct igbvf_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int err = 0, vector = 0;
+
+ if (strlen(netdev->name) < (IFNAMSIZ - 5)) {
+ sprintf(adapter->tx_ring->name, "%s-tx-0", netdev->name);
+ sprintf(adapter->rx_ring->name, "%s-rx-0", netdev->name);
+ } else {
+ memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ);
+ memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ);
+ }
+
+ err = request_irq(adapter->msix_entries[vector].vector,
+ igbvf_intr_msix_tx, 0, adapter->tx_ring->name,
+ netdev);
+ if (err)
+ goto out;
+
+ adapter->tx_ring->itr_register = E1000_EITR(vector);
+ adapter->tx_ring->itr_val = 1952;
+ vector++;
+
+ err = request_irq(adapter->msix_entries[vector].vector,
+ igbvf_intr_msix_rx, 0, adapter->rx_ring->name,
+ netdev);
+ if (err)
+ goto out;
+
+ adapter->rx_ring->itr_register = E1000_EITR(vector);
+ adapter->rx_ring->itr_val = 1952;
+ vector++;
+
+ err = request_irq(adapter->msix_entries[vector].vector,
+ igbvf_msix_other, 0, netdev->name, netdev);
+ if (err)
+ goto out;
+
+ igbvf_configure_msix(adapter);
+ return 0;
+out:
+ return err;
+}
+
+/**
+ * igbvf_alloc_queues - Allocate memory for all rings
+ * @adapter: board private structure to initialize
+ **/
+static int __devinit igbvf_alloc_queues(struct igbvf_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+
+ adapter->tx_ring = kzalloc(sizeof(struct igbvf_ring), GFP_KERNEL);
+ if (!adapter->tx_ring)
+ return -ENOMEM;
+
+ adapter->rx_ring = kzalloc(sizeof(struct igbvf_ring), GFP_KERNEL);
+ if (!adapter->rx_ring) {
+ kfree(adapter->tx_ring);
+ return -ENOMEM;
+ }
+
+ netif_napi_add(netdev, &adapter->rx_ring->napi, igbvf_poll, 64);
+
+ return 0;
+}
+
+/**
+ * igbvf_request_irq - initialize interrupts
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int igbvf_request_irq(struct igbvf_adapter *adapter)
+{
+ int err = -1;
+
+ /* igbvf supports msi-x only */
+ if (adapter->msix_entries)
+ err = igbvf_request_msix(adapter);
+
+ if (!err)
+ return err;
+
+ dev_err(&adapter->pdev->dev,
+ "Unable to allocate interrupt, Error: %d\n", err);
+
+ return err;
+}
+
+static void igbvf_free_irq(struct igbvf_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int vector;
+
+ if (adapter->msix_entries) {
+ for (vector = 0; vector < 3; vector++)
+ free_irq(adapter->msix_entries[vector].vector, netdev);
+ }
+}
+
+/**
+ * igbvf_irq_disable - Mask off interrupt generation on the NIC
+ **/
+static void igbvf_irq_disable(struct igbvf_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ ew32(EIMC, ~0);
+
+ if (adapter->msix_entries)
+ ew32(EIAC, 0);
+}
+
+/**
+ * igbvf_irq_enable - Enable default interrupt generation settings
+ **/
+static void igbvf_irq_enable(struct igbvf_adapter *adapter)
+{
+ struct e1000_hw *hw = &adapter->hw;
+
+ ew32(EIAC, adapter->eims_enable_mask);
+ ew32(EIAM, adapter->eims_enable_mask);
+ ew32(EIMS, adapter->eims_enable_mask);
+}
+
+/**
+ * igbvf_poll - NAPI Rx polling callback
+ * @napi: struct associated with this polling callback
+ * @budget: amount of packets driver is allowed to process this poll
+ **/
+static int igbvf_poll(struct napi_struct *napi, int budget)
+{
+ struct igbvf_ring *rx_ring = container_of(napi, struct igbvf_ring, napi);
+ struct igbvf_adapter *adapter = rx_ring->adapter;
+ struct e1000_hw *hw = &adapter->hw;
+ int work_do