diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ixgbe/ixgbe_main.c')
| -rw-r--r-- | drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6107 |
1 files changed, 3436 insertions, 2671 deletions
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3dc6cef5810..f5aa3311ea2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1,7 +1,7 @@ /******************************************************************************* Intel 10 Gigabit PCI Express Linux driver - Copyright(c) 1999 - 2012 Intel Corporation. + Copyright(c) 1999 - 2014 Intel Corporation. This program is free software; you can redistribute it and/or modify it under the terms and conditions of the GNU General Public License, @@ -20,6 +20,7 @@ the file called "COPYING". Contact Information: + Linux NICS <linux.nics@intel.com> e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 @@ -44,6 +45,8 @@ #include <linux/ethtool.h> #include <linux/if.h> #include <linux/if_vlan.h> +#include <linux/if_macvlan.h> +#include <linux/if_bridge.h> #include <linux/prefetch.h> #include <scsi/fc/fc_fcoe.h> @@ -55,16 +58,17 @@ char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = "Intel(R) 10 Gigabit PCI Express Network Driver"; +#ifdef IXGBE_FCOE char ixgbe_default_device_descr[] = "Intel(R) 10 Gigabit Network Connection"; -#define MAJ 3 -#define MIN 6 -#define BUILD 7 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ - __stringify(BUILD) "-k" +#else +static char ixgbe_default_device_descr[] = + "Intel(R) 10 Gigabit Network Connection"; +#endif +#define DRV_VERSION "3.19.1-k" const char ixgbe_driver_version[] = DRV_VERSION; static const char ixgbe_copyright[] = - "Copyright (c) 1999-2012 Intel Corporation."; + "Copyright (c) 1999-2014 Intel Corporation."; static const struct ixgbe_info *ixgbe_info_tbl[] = { [board_82598] = &ixgbe_82598_info, @@ -107,8 +111,10 @@ static DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = { {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T), board_X540 }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2), board_82599 }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS), board_82599 }, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP), board_82599 }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP), board_82599 }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP), board_82599 }, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T1), board_X540 }, /* required last entry */ {0, } }; @@ -128,29 +134,275 @@ static struct notifier_block dca_notifier = { static unsigned int max_vfs; module_param(max_vfs, uint, 0); MODULE_PARM_DESC(max_vfs, - "Maximum number of virtual functions to allocate per physical function"); + "Maximum number of virtual functions to allocate per physical function - default is zero and maximum value is 63. (Deprecated)"); #endif /* CONFIG_PCI_IOV */ +static unsigned int allow_unsupported_sfp; +module_param(allow_unsupported_sfp, uint, 0); +MODULE_PARM_DESC(allow_unsupported_sfp, + "Allow unsupported and untested SFP+ modules on 82599-based adapters"); + +#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) +static int debug = -1; +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); -#define DEFAULT_DEBUG_LEVEL_SHIFT 3 +static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); + +static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter, + u32 reg, u16 *value) +{ + struct pci_dev *parent_dev; + struct pci_bus *parent_bus; + + parent_bus = adapter->pdev->bus->parent; + if (!parent_bus) + return -1; + + parent_dev = parent_bus->self; + if (!parent_dev) + return -1; + + if (!pci_is_pcie(parent_dev)) + return -1; + + pcie_capability_read_word(parent_dev, reg, value); + if (*value == IXGBE_FAILED_READ_CFG_WORD && + ixgbe_check_cfg_remove(&adapter->hw, parent_dev)) + return -1; + return 0; +} + +static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u16 link_status = 0; + int err; + + hw->bus.type = ixgbe_bus_type_pci_express; + + /* Get the negotiated link width and speed from PCI config space of the + * parent, as this device is behind a switch + */ + err = ixgbe_read_pci_cfg_word_parent(adapter, 18, &link_status); + + /* assume caller will handle error case */ + if (err) + return err; + + hw->bus.width = ixgbe_convert_bus_width(link_status); + hw->bus.speed = ixgbe_convert_bus_speed(link_status); + + return 0; +} + +/** + * ixgbe_check_from_parent - Determine whether PCIe info should come from parent + * @hw: hw specific details + * + * This function is used by probe to determine whether a device's PCI-Express + * bandwidth details should be gathered from the parent bus instead of from the + * device. Used to ensure that various locations all have the correct device ID + * checks. + */ +static inline bool ixgbe_pcie_from_parent(struct ixgbe_hw *hw) +{ + switch (hw->device_id) { + case IXGBE_DEV_ID_82599_SFP_SF_QP: + case IXGBE_DEV_ID_82599_QSFP_SF_QP: + return true; + default: + return false; + } +} + +static void ixgbe_check_minimum_link(struct ixgbe_adapter *adapter, + int expected_gts) +{ + int max_gts = 0; + enum pci_bus_speed speed = PCI_SPEED_UNKNOWN; + enum pcie_link_width width = PCIE_LNK_WIDTH_UNKNOWN; + struct pci_dev *pdev; + + /* determine whether to use the the parent device + */ + if (ixgbe_pcie_from_parent(&adapter->hw)) + pdev = adapter->pdev->bus->parent->self; + else + pdev = adapter->pdev; + + if (pcie_get_minimum_link(pdev, &speed, &width) || + speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) { + e_dev_warn("Unable to determine PCI Express bandwidth.\n"); + return; + } + + switch (speed) { + case PCIE_SPEED_2_5GT: + /* 8b/10b encoding reduces max throughput by 20% */ + max_gts = 2 * width; + break; + case PCIE_SPEED_5_0GT: + /* 8b/10b encoding reduces max throughput by 20% */ + max_gts = 4 * width; + break; + case PCIE_SPEED_8_0GT: + /* 128b/130b encoding reduces throughput by less than 2% */ + max_gts = 8 * width; + break; + default: + e_dev_warn("Unable to determine PCI Express bandwidth.\n"); + return; + } + + e_dev_info("PCI Express bandwidth of %dGT/s available\n", + max_gts); + e_dev_info("(Speed:%s, Width: x%d, Encoding Loss:%s)\n", + (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : + speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : + speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : + "Unknown"), + width, + (speed == PCIE_SPEED_2_5GT ? "20%" : + speed == PCIE_SPEED_5_0GT ? "20%" : + speed == PCIE_SPEED_8_0GT ? "<2%" : + "Unknown")); + + if (max_gts < expected_gts) { + e_dev_warn("This is not sufficient for optimal performance of this card.\n"); + e_dev_warn("For optimal performance, at least %dGT/s of bandwidth is required.\n", + expected_gts); + e_dev_warn("A slot with more lanes and/or higher speed is suggested.\n"); + } +} static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter) { if (!test_bit(__IXGBE_DOWN, &adapter->state) && + !test_bit(__IXGBE_REMOVING, &adapter->state) && !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state)) schedule_work(&adapter->service_task); } +static void ixgbe_remove_adapter(struct ixgbe_hw *hw) +{ + struct ixgbe_adapter *adapter = hw->back; + + if (!hw->hw_addr) + return; + hw->hw_addr = NULL; + e_dev_err("Adapter removed\n"); + if (test_bit(__IXGBE_SERVICE_INITED, &adapter->state)) + ixgbe_service_event_schedule(adapter); +} + +static void ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg) +{ + u32 value; + + /* The following check not only optimizes a bit by not + * performing a read on the status register when the + * register just read was a status register read that + * returned IXGBE_FAILED_READ_REG. It also blocks any + * potential recursion. + */ + if (reg == IXGBE_STATUS) { + ixgbe_remove_adapter(hw); + return; + } + value = ixgbe_read_reg(hw, IXGBE_STATUS); + if (value == IXGBE_FAILED_READ_REG) + ixgbe_remove_adapter(hw); +} + +/** + * ixgbe_read_reg - Read from device register + * @hw: hw specific details + * @reg: offset of register to read + * + * Returns : value read or IXGBE_FAILED_READ_REG if removed + * + * This function is used to read device registers. It checks for device + * removal by confirming any read that returns all ones by checking the + * status register value for all ones. This function avoids reading from + * the hardware if a removal was previously detected in which case it + * returns IXGBE_FAILED_READ_REG (all ones). + */ +u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg) +{ + u8 __iomem *reg_addr = ACCESS_ONCE(hw->hw_addr); + u32 value; + + if (ixgbe_removed(reg_addr)) + return IXGBE_FAILED_READ_REG; + value = readl(reg_addr + reg); + if (unlikely(value == IXGBE_FAILED_READ_REG)) + ixgbe_check_remove(hw, reg); + return value; +} + +static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev) +{ + u16 value; + + pci_read_config_word(pdev, PCI_VENDOR_ID, &value); + if (value == IXGBE_FAILED_READ_CFG_WORD) { + ixgbe_remove_adapter(hw); + return true; + } + return false; +} + +u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg) +{ + struct ixgbe_adapter *adapter = hw->back; + u16 value; + + if (ixgbe_removed(hw->hw_addr)) + return IXGBE_FAILED_READ_CFG_WORD; + pci_read_config_word(adapter->pdev, reg, &value); + if (value == IXGBE_FAILED_READ_CFG_WORD && + ixgbe_check_cfg_remove(hw, adapter->pdev)) + return IXGBE_FAILED_READ_CFG_WORD; + return value; +} + +#ifdef CONFIG_PCI_IOV +static u32 ixgbe_read_pci_cfg_dword(struct ixgbe_hw *hw, u32 reg) +{ + struct ixgbe_adapter *adapter = hw->back; + u32 value; + + if (ixgbe_removed(hw->hw_addr)) + return IXGBE_FAILED_READ_CFG_DWORD; + pci_read_config_dword(adapter->pdev, reg, &value); + if (value == IXGBE_FAILED_READ_CFG_DWORD && + ixgbe_check_cfg_remove(hw, adapter->pdev)) + return IXGBE_FAILED_READ_CFG_DWORD; + return value; +} +#endif /* CONFIG_PCI_IOV */ + +void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value) +{ + struct ixgbe_adapter *adapter = hw->back; + + if (ixgbe_removed(hw->hw_addr)) + return; + pci_write_config_word(adapter->pdev, reg, value); +} + static void ixgbe_service_event_complete(struct ixgbe_adapter *adapter) { BUG_ON(!test_bit(__IXGBE_SERVICE_SCHED, &adapter->state)); /* flush memory to make sure state is correct before next watchdog */ - smp_mb__before_clear_bit(); + smp_mb__before_atomic(); clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state); } @@ -284,7 +536,7 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) struct ixgbe_reg_info *reginfo; int n = 0; struct ixgbe_ring *tx_ring; - struct ixgbe_tx_buffer *tx_buffer_info; + struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; struct my_u0 { u64 a; u64 b; } *u0; struct ixgbe_ring *rx_ring; @@ -321,17 +573,18 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) goto exit; dev_info(&adapter->pdev->dev, "TX Rings Summary\n"); - pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); + pr_info(" %s %s %s %s\n", + "Queue [NTU] [NTC] [bi(ntc)->dma ]", + "leng", "ntw", "timestamp"); for (n = 0; n < adapter->num_tx_queues; n++) { tx_ring = adapter->tx_ring[n]; - tx_buffer_info = - &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; - pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", + tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; + pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)tx_buffer_info->dma, - tx_buffer_info->length, - tx_buffer_info->next_to_watch, - (u64)tx_buffer_info->time_stamp); + (u64)dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + tx_buffer->next_to_watch, + (u64)tx_buffer->time_stamp); } /* Print TX Rings */ @@ -342,13 +595,37 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) /* Transmit Descriptor Formats * - * Advanced Transmit Descriptor + * 82598 Advanced Transmit Descriptor * +--------------------------------------------------------------+ * 0 | Buffer Address [63:0] | * +--------------------------------------------------------------+ - * 8 | PAYLEN | PORTS | IDX | STA | DCMD |DTYP | RSV | DTALEN | + * 8 | PAYLEN | POPTS | IDX | STA | DCMD |DTYP | RSV | DTALEN | * +--------------------------------------------------------------+ * 63 46 45 40 39 36 35 32 31 24 23 20 19 0 + * + * 82598 Advanced Transmit Descriptor (Write-Back Format) + * +--------------------------------------------------------------+ + * 0 | RSV [63:0] | + * +--------------------------------------------------------------+ + * 8 | RSV | STA | NXTSEQ | + * +--------------------------------------------------------------+ + * 63 36 35 32 31 0 + * + * 82599+ Advanced Transmit Descriptor + * +--------------------------------------------------------------+ + * 0 | Buffer Address [63:0] | + * +--------------------------------------------------------------+ + * 8 |PAYLEN |POPTS|CC|IDX |STA |DCMD |DTYP |MAC |RSV |DTALEN | + * +--------------------------------------------------------------+ + * 63 46 45 40 39 38 36 35 32 31 24 23 20 19 18 17 16 15 0 + * + * 82599+ Advanced Transmit Descriptor (Write-Back Format) + * +--------------------------------------------------------------+ + * 0 | RSV [63:0] | + * +--------------------------------------------------------------+ + * 8 | RSV | STA | RSV | + * +--------------------------------------------------------------+ + * 63 36 35 32 31 0 */ for (n = 0; n < adapter->num_tx_queues; n++) { @@ -356,39 +633,43 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter) pr_info("------------------------------------\n"); pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("T [desc] [address 63:0 ] " - "[PlPOIdStDDt Ln] [bi->dma ] " - "leng ntw timestamp bi->skb\n"); + pr_info("%s%s %s %s %s %s\n", + "T [desc] [address 63:0 ] ", + "[PlPOIdStDDt Ln] [bi->dma ] ", + "leng", "ntw", "timestamp", "bi->skb"); for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { - tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i); - tx_buffer_info = &tx_ring->tx_buffer_info[i]; + tx_desc = IXGBE_TX_DESC(tx_ring, i); + tx_buffer = &tx_ring->tx_buffer_info[i]; u0 = (struct my_u0 *)tx_desc; - pr_info("T [0x%03X] %016llX %016llX %016llX" - " %04X %p %016llX %p", i, - le64_to_cpu(u0->a), - le64_to_cpu(u0->b), - (u64)tx_buffer_info->dma, - tx_buffer_info->length, - tx_buffer_info->next_to_watch, - (u64)tx_buffer_info->time_stamp, - tx_buffer_info->skb); - if (i == tx_ring->next_to_use && - i == tx_ring->next_to_clean) - pr_cont(" NTC/U\n"); - else if (i == tx_ring->next_to_use) - pr_cont(" NTU\n"); - else if (i == tx_ring->next_to_clean) - pr_cont(" NTC\n"); - else - pr_cont("\n"); - - if (netif_msg_pktdata(adapter) && - tx_buffer_info->dma != 0) - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt(tx_buffer_info->dma), - tx_buffer_info->length, true); + if (dma_unmap_len(tx_buffer, len) > 0) { + pr_info("T [0x%03X] %016llX %016llX %016llX %08X %p %016llX %p", + i, + le64_to_cpu(u0->a), + le64_to_cpu(u0->b), + (u64)dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + tx_buffer->next_to_watch, + (u64)tx_buffer->time_stamp, + tx_buffer->skb); + if (i == tx_ring->next_to_use && + i == tx_ring->next_to_clean) + pr_cont(" NTC/U\n"); + else if (i == tx_ring->next_to_use) + pr_cont(" NTU\n"); + else if (i == tx_ring->next_to_clean) + pr_cont(" NTC\n"); + else + pr_cont("\n"); + + if (netif_msg_pktdata(adapter) && + tx_buffer->skb) + print_hex_dump(KERN_INFO, "", + DUMP_PREFIX_ADDRESS, 16, 1, + tx_buffer->skb->data, + dma_unmap_len(tx_buffer, len), + true); + } } } @@ -408,7 +689,9 @@ rx_ring_summary: dev_info(&adapter->pdev->dev, "RX Rings Dump\n"); - /* Advanced Receive Descriptor (Read) Format + /* Receive Descriptor Formats + * + * 82598 Advanced Receive Descriptor (Read) Format * 63 1 0 * +-----------------------------------------------------+ * 0 | Packet Buffer Address [63:1] |A0/NSE| @@ -417,32 +700,57 @@ rx_ring_summary: * +-----------------------------------------------------+ * * - * Advanced Receive Descriptor (Write-Back) Format + * 82598 Advanced Receive Descriptor (Write-Back) Format * * 63 48 47 32 31 30 21 20 16 15 4 3 0 * +------------------------------------------------------+ - * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS | - * | Checksum Ident | | | | Type | Type | + * 0 | RSS Hash / |SPH| HDR_LEN | RSV |Packet| RSS | + * | Packet | IP | | | | Type | Type | + * | Checksum | Ident | | | | | | * +------------------------------------------------------+ * 8 | VLAN Tag | Length | Extended Error | Extended Status | * +------------------------------------------------------+ * 63 48 47 32 31 20 19 0 + * + * 82599+ Advanced Receive Descriptor (Read) Format + * 63 1 0 + * +-----------------------------------------------------+ + * 0 | Packet Buffer Address [63:1] |A0/NSE| + * +----------------------------------------------+------+ + * 8 | Header Buffer Address [63:1] | DD | + * +-----------------------------------------------------+ + * + * + * 82599+ Advanced Receive Descriptor (Write-Back) Format + * + * 63 48 47 32 31 30 21 20 17 16 4 3 0 + * +------------------------------------------------------+ + * 0 |RSS / Frag Checksum|SPH| HDR_LEN |RSC- |Packet| RSS | + * |/ RTT / PCoE_PARAM | | | CNT | Type | Type | + * |/ Flow Dir Flt ID | | | | | | + * +------------------------------------------------------+ + * 8 | VLAN Tag | Length |Extended Error| Xtnd Status/NEXTP | + * +------------------------------------------------------+ + * 63 48 47 32 31 20 19 0 */ + for (n = 0; n < adapter->num_rx_queues; n++) { rx_ring = adapter->rx_ring[n]; pr_info("------------------------------------\n"); pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("R [desc] [ PktBuf A0] " - "[ HeadBuf DD] [bi->dma ] [bi->skb] " + pr_info("%s%s%s", + "R [desc] [ PktBuf A0] ", + "[ HeadBuf DD] [bi->dma ] [bi->skb ] ", "<-- Adv Rx Read format\n"); - pr_info("RWB[desc] [PcsmIpSHl PtRs] " - "[vl er S cks ln] ---------------- [bi->skb] " + pr_info("%s%s%s", + "RWB[desc] [PcsmIpSHl PtRs] ", + "[vl er S cks ln] ---------------- [bi->skb ] ", "<-- Adv Rx Write-Back format\n"); for (i = 0; i < rx_ring->count; i++) { rx_buffer_info = &rx_ring->rx_buffer_info[i]; - rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i); + rx_desc = IXGBE_RX_DESC(rx_ring, i); u0 = (struct my_u0 *)rx_desc; staterr = le32_to_cpu(rx_desc->wb.upper.status_error); if (staterr & IXGBE_RXD_STAT_DD) { @@ -460,21 +768,13 @@ rx_ring_summary: (u64)rx_buffer_info->dma, rx_buffer_info->skb); - if (netif_msg_pktdata(adapter)) { + if (netif_msg_pktdata(adapter) && + rx_buffer_info->dma) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt(rx_buffer_info->dma), - rx_ring->rx_buf_len, true); - - if (rx_ring->rx_buf_len - < IXGBE_RXBUFFER_2K) - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt( - rx_buffer_info->page_dma + - rx_buffer_info->page_offset - ), - PAGE_SIZE/2, true); + page_address(rx_buffer_info->page) + + rx_buffer_info->page_offset, + ixgbe_rx_bufsz(rx_ring), true); } } @@ -512,7 +812,7 @@ static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter) ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD); } -/* +/** * ixgbe_set_ivar - set the IVAR registers, mapping interrupt causes to vectors * @adapter: pointer to adapter struct * @direction: 0 for Rx, 1 for Tx, -1 for other causes @@ -584,81 +884,96 @@ static inline void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, } } -static inline void ixgbe_unmap_tx_resource(struct ixgbe_ring *ring, - struct ixgbe_tx_buffer *tx_buffer) +void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *ring, + struct ixgbe_tx_buffer *tx_buffer) { - if (tx_buffer->dma) { - if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_MAPPED_AS_PAGE) - dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, - DMA_TO_DEVICE); - else + if (tx_buffer->skb) { + dev_kfree_skb_any(tx_buffer->skb); + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, - DMA_TO_DEVICE); + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); } - tx_buffer->dma = 0; + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* tx_buffer must be completely set up in the transmit path */ } -void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *tx_ring, - struct ixgbe_tx_buffer *tx_buffer_info) +static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) { - ixgbe_unmap_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info->skb) - dev_kfree_skb_any(tx_buffer_info->skb); - tx_buffer_info->skb = NULL; - /* tx_buffer_info must be completely set up in the transmit path */ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw_stats *hwstats = &adapter->stats; + int i; + u32 data; + + if ((hw->fc.current_mode != ixgbe_fc_full) && + (hw->fc.current_mode != ixgbe_fc_rx_pause)) + return; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); + break; + default: + data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); + } + hwstats->lxoffrxc += data; + + /* refill credits (no tx hang) if we received xoff */ + if (!data) + return; + + for (i = 0; i < adapter->num_tx_queues; i++) + clear_bit(__IXGBE_HANG_CHECK_ARMED, + &adapter->tx_ring[i]->state); } static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_hw_stats *hwstats = &adapter->stats; - u32 data = 0; u32 xoff[8] = {0}; + u8 tc; int i; + bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; - if ((hw->fc.current_mode == ixgbe_fc_full) || - (hw->fc.current_mode == ixgbe_fc_rx_pause)) { - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); - break; - default: - data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); - } - hwstats->lxoffrxc += data; - - /* refill credits (no tx hang) if we received xoff */ - if (!data) - return; + if (adapter->ixgbe_ieee_pfc) + pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); - for (i = 0; i < adapter->num_tx_queues; i++) - clear_bit(__IXGBE_HANG_CHECK_ARMED, - &adapter->tx_ring[i]->state); - return; - } else if (!(adapter->dcb_cfg.pfc_mode_enable)) + if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) { + ixgbe_update_xoff_rx_lfc(adapter); return; + } /* update stats for each tc, only valid with PFC enabled */ for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { + u32 pxoffrxc; + switch (hw->mac.type) { case ixgbe_mac_82598EB: - xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); + pxoffrxc = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); break; default: - xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i)); + pxoffrxc = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i)); } - hwstats->pxoffrxc[i] += xoff[i]; + hwstats->pxoffrxc[i] += pxoffrxc; + /* Get the TC for given UP */ + tc = netdev_get_prio_tc_map(adapter->netdev, i); + xoff[tc] += pxoffrxc; } /* disarm tx queues that have received xoff frames */ for (i = 0; i < adapter->num_tx_queues; i++) { struct ixgbe_ring *tx_ring = adapter->tx_ring[i]; - u8 tc = tx_ring->dcb_tc; + tc = tx_ring->dcb_tc; if (xoff[tc]) clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state); } @@ -666,16 +981,23 @@ static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring) { - return ring->tx_stats.completed; + return ring->stats.packets; } static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring) { - struct ixgbe_adapter *adapter = netdev_priv(ring->netdev); - struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_adapter *adapter; + struct ixgbe_hw *hw; + u32 head, tail; - u32 head = IXGBE_READ_REG(hw, IXGBE_TDH(ring->reg_idx)); - u32 tail = IXGBE_READ_REG(hw, IXGBE_TDT(ring->reg_idx)); + if (ring->l2_accel_priv) + adapter = ring->l2_accel_priv->real_adapter; + else + adapter = netdev_priv(ring->netdev); + + hw = &adapter->hw; + head = IXGBE_READ_REG(hw, IXGBE_TDH(ring->reg_idx)); + tail = IXGBE_READ_REG(hw, IXGBE_TDT(ring->reg_idx)); if (head != tail) return (head < tail) ? @@ -729,6 +1051,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) /* Do the reset outside of interrupt context */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) { adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + e_warn(drv, "initiating reset due to tx timeout\n"); ixgbe_service_event_schedule(adapter); } } @@ -746,56 +1069,88 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, union ixgbe_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx.work_limit; - u16 i = tx_ring->next_to_clean; + unsigned int i = tx_ring->next_to_clean; + + if (test_bit(__IXGBE_DOWN, &adapter->state)) + return true; tx_buffer = &tx_ring->tx_buffer_info[i]; - tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i); + tx_desc = IXGBE_TX_DESC(tx_ring, i); + i -= tx_ring->count; - for (; budget; budget--) { + do { union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; + /* prevent any other reads prior to eop_desc */ + read_barrier_depends(); + /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) break; - /* count the packet as being completed */ - tx_ring->tx_stats.completed++; - /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; - /* prevent any other reads prior to eop_desc being verified */ - rmb(); + /* update the statistics for this packet */ + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; - do { - ixgbe_unmap_tx_resource(tx_ring, tx_buffer); - tx_desc->wb.status = 0; - if (likely(tx_desc == eop_desc)) { - eop_desc = NULL; - dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; + /* free the skb */ + dev_kfree_skb_any(tx_buffer->skb); - total_bytes += tx_buffer->bytecount; - total_packets += tx_buffer->gso_segs; - } + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { tx_buffer++; tx_desc++; i++; - if (unlikely(i == tx_ring->count)) { - i = 0; - + if (unlikely(!i)) { + i -= tx_ring->count; tx_buffer = tx_ring->tx_buffer_info; - tx_desc = IXGBE_TX_DESC_ADV(tx_ring, 0); + tx_desc = IXGBE_TX_DESC(tx_ring, 0); } - } while (eop_desc); - } + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IXGBE_TX_DESC(tx_ring, 0); + } + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; tx_ring->next_to_clean = i; u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.bytes += total_bytes; @@ -807,7 +1162,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) { /* schedule immediate reset if we believe we hung */ struct ixgbe_hw *hw = &adapter->hw; - tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i); e_err(drv, "Detected Tx Unit Hang\n" " Tx Queue <%d>\n" " TDH, TDT <%x>, <%x>\n" @@ -835,6 +1189,9 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, return true; } + netdev_tx_completed_queue(txring_txq(tx_ring), + total_packets, total_bytes); + #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (ixgbe_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -842,9 +1199,11 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, * sees the new next_to_clean. */ smp_mb(); - if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->queue_index) && - !test_bit(__IXGBE_DOWN, &adapter->state)) { - netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->queue_index) + && !test_bit(__IXGBE_DOWN, &adapter->state)) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->queue_index); ++tx_ring->tx_stats.restart_queue; } } @@ -853,63 +1212,67 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, } #ifdef CONFIG_IXGBE_DCA -static void ixgbe_update_rx_dca(struct ixgbe_adapter *adapter, - struct ixgbe_ring *rx_ring, +static void ixgbe_update_tx_dca(struct ixgbe_adapter *adapter, + struct ixgbe_ring *tx_ring, int cpu) { struct ixgbe_hw *hw = &adapter->hw; - u32 rxctrl; - u8 reg_idx = rx_ring->reg_idx; + u32 txctrl = dca3_get_tag(tx_ring->dev, cpu); + u16 reg_offset; - rxctrl = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(reg_idx)); switch (hw->mac.type) { case ixgbe_mac_82598EB: - rxctrl &= ~IXGBE_DCA_RXCTRL_CPUID_MASK; - rxctrl |= dca3_get_tag(rx_ring->dev, cpu); + reg_offset = IXGBE_DCA_TXCTRL(tx_ring->reg_idx); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: - rxctrl &= ~IXGBE_DCA_RXCTRL_CPUID_MASK_82599; - rxctrl |= (dca3_get_tag(rx_ring->dev, cpu) << - IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599); + reg_offset = IXGBE_DCA_TXCTRL_82599(tx_ring->reg_idx); + txctrl <<= IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599; break; default: - break; + /* for unknown hardware do not write register */ + return; } - rxctrl |= IXGBE_DCA_RXCTRL_DESC_DCA_EN; - rxctrl |= IXGBE_DCA_RXCTRL_HEAD_DCA_EN; - rxctrl &= ~(IXGBE_DCA_RXCTRL_DESC_RRO_EN); - IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(reg_idx), rxctrl); + + /* + * We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + txctrl |= IXGBE_DCA_TXCTRL_DESC_RRO_EN | + IXGBE_DCA_TXCTRL_DATA_RRO_EN | + IXGBE_DCA_TXCTRL_DESC_DCA_EN; + + IXGBE_WRITE_REG(hw, reg_offset, txctrl); } -static void ixgbe_update_tx_dca(struct ixgbe_adapter *adapter, - struct ixgbe_ring *tx_ring, +static void ixgbe_update_rx_dca(struct ixgbe_adapter *adapter, + struct ixgbe_ring *rx_ring, int cpu) { struct ixgbe_hw *hw = &adapter->hw; - u32 txctrl; - u8 reg_idx = tx_ring->reg_idx; + u32 rxctrl = dca3_get_tag(rx_ring->dev, cpu); + u8 reg_idx = rx_ring->reg_idx; + switch (hw->mac.type) { - case ixgbe_mac_82598EB: - txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(reg_idx)); - txctrl &= ~IXGBE_DCA_TXCTRL_CPUID_MASK; - txctrl |= dca3_get_tag(tx_ring->dev, cpu); - txctrl |= IXGBE_DCA_TXCTRL_DESC_DCA_EN; - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(reg_idx), txctrl); - break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: - txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(reg_idx)); - txctrl &= ~IXGBE_DCA_TXCTRL_CPUID_MASK_82599; - txctrl |= (dca3_get_tag(tx_ring->dev, cpu) << - IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599); - txctrl |= IXGBE_DCA_TXCTRL_DESC_DCA_EN; - IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(reg_idx), txctrl); + rxctrl <<= IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599; break; default: break; } + + /* + * We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + rxctrl |= IXGBE_DCA_RXCTRL_DESC_RRO_EN | + IXGBE_DCA_RXCTRL_DESC_DCA_EN; + + IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(reg_idx), rxctrl); } static void ixgbe_update_dca(struct ixgbe_q_vector *q_vector) @@ -921,10 +1284,10 @@ static void ixgbe_update_dca(struct ixgbe_q_vector *q_vector) if (q_vector->cpu == cpu) goto out_no_update; - for (ring = q_vector->tx.ring; ring != NULL; ring = ring->next) + ixgbe_for_each_ring(ring, q_vector->tx) ixgbe_update_tx_dca(adapter, ring, cpu); - for (ring = q_vector->rx.ring; ring != NULL; ring = ring->next) + ixgbe_for_each_ring(ring, q_vector->rx) ixgbe_update_rx_dca(adapter, ring, cpu); q_vector->cpu = cpu; @@ -934,7 +1297,6 @@ out_no_update: static void ixgbe_setup_dca(struct ixgbe_adapter *adapter) { - int num_q_vectors; int i; if (!(adapter->flags & IXGBE_FLAG_DCA_ENABLED)) @@ -943,12 +1305,7 @@ static void ixgbe_setup_dca(struct ixgbe_adapter *adapter) /* always use CB2 mode, difference is masked in the CB driver */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL, 2); - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) - num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - else - num_q_vectors = 1; - - for (i = 0; i < num_q_vectors; i++) { + for (i = 0; i < adapter->num_q_vectors; i++) { adapter->q_vector[i]->cpu = -1; ixgbe_update_dca(adapter->q_vector[i]); } @@ -984,99 +1341,76 @@ static int __ixgbe_notify_dca(struct device *dev, void *data) return 0; } -#endif /* CONFIG_IXGBE_DCA */ -static inline void ixgbe_rx_hash(union ixgbe_adv_rx_desc *rx_desc, +#endif /* CONFIG_IXGBE_DCA */ +static inline void ixgbe_rx_hash(struct ixgbe_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { - skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); + if (ring->netdev->features & NETIF_F_RXHASH) + skb_set_hash(skb, + le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + PKT_HASH_TYPE_L3); } +#ifdef IXGBE_FCOE /** * ixgbe_rx_is_fcoe - check the rx desc for incoming pkt type - * @adapter: address of board private structure + * @ring: structure containing ring specific data * @rx_desc: advanced rx descriptor * * Returns : true if it is FCoE pkt */ -static inline bool ixgbe_rx_is_fcoe(struct ixgbe_adapter *adapter, +static inline bool ixgbe_rx_is_fcoe(struct ixgbe_ring *ring, union ixgbe_adv_rx_desc *rx_desc) { __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; - return (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && + return test_bit(__IXGBE_RX_FCOE, &ring->state) && ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_ETQF_MASK)) == (cpu_to_le16(IXGBE_ETQF_FILTER_FCOE << IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT))); } -/** - * ixgbe_receive_skb - Send a completed packet up the stack - * @adapter: board private structure - * @skb: packet to send up - * @status: hardware indication of status of receive - * @rx_ring: rx descriptor ring (for a specific queue) to setup - * @rx_desc: rx descriptor - **/ -static void ixgbe_receive_skb(struct ixgbe_q_vector *q_vector, - struct sk_buff *skb, u8 status, - struct ixgbe_ring *ring, - union ixgbe_adv_rx_desc *rx_desc) -{ - struct ixgbe_adapter *adapter = q_vector->adapter; - struct napi_struct *napi = &q_vector->napi; - bool is_vlan = (status & IXGBE_RXD_STAT_VP); - u16 tag = le16_to_cpu(rx_desc->wb.upper.vlan); - - if (is_vlan && (tag & VLAN_VID_MASK)) - __vlan_hwaccel_put_tag(skb, tag); - - if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) - napi_gro_receive(napi, skb); - else - netif_rx(skb); -} - +#endif /* IXGBE_FCOE */ /** * ixgbe_rx_checksum - indicate in skb if hw indicated a good cksum - * @adapter: address of board private structure - * @status_err: hardware indication of status of receive + * @ring: structure containing ring specific data + * @rx_desc: current Rx descriptor being processed * @skb: skb currently being received and modified - * @status_err: status error value of last descriptor in packet **/ -static inline void ixgbe_rx_checksum(struct ixgbe_adapter *adapter, +static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb, - u32 status_err) + struct sk_buff *skb) { - skb->ip_summed = CHECKSUM_NONE; + skb_checksum_none_assert(skb); /* Rx csum disabled */ - if (!(adapter->flags & IXGBE_FLAG_RX_CSUM_ENABLED)) + if (!(ring->netdev->features & NETIF_F_RXCSUM)) return; /* if IP and error */ - if ((status_err & IXGBE_RXD_STAT_IPCS) && - (status_err & IXGBE_RXDADV_ERR_IPE)) { - adapter->hw_csum_rx_error++; + if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && + ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { + ring->rx_stats.csum_err++; return; } - if (!(status_err & IXGBE_RXD_STAT_L4CS)) + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) return; - if (status_err & IXGBE_RXDADV_ERR_TCPE) { - u16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; /* * 82599 errata, UDP frames with a 0 checksum can be marked as * checksum errors. */ - if ((pkt_info & IXGBE_RXDADV_PKTTYPE_UDP) && - (adapter->hw.mac.type == ixgbe_mac_82599EB)) + if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_UDP)) && + test_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state)) return; - adapter->hw_csum_rx_error++; + ring->rx_stats.csum_err++; return; } @@ -1086,6 +1420,10 @@ static inline void ixgbe_rx_checksum(struct ixgbe_adapter *adapter, static inline void ixgbe_release_rx_desc(struct ixgbe_ring *rx_ring, u32 val) { + rx_ring->next_to_use = val; + + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = val; /* * Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -1093,11 +1431,54 @@ static inline void ixgbe_release_rx_desc(struct ixgbe_ring *rx_ring, u32 val) * such as IA-64). */ wmb(); - writel(val, rx_ring->tail); + ixgbe_write_tail(rx_ring, val); +} + +static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *bi) +{ + struct page *page = bi->page; + dma_addr_t dma = bi->dma; + + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(dma)) + return true; + + /* alloc new page for storage */ + if (likely(!page)) { + page = __skb_alloc_pages(GFP_ATOMIC | __GFP_COLD | __GFP_COMP, + bi->skb, ixgbe_rx_pg_order(rx_ring)); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_rx_page_failed++; + return false; + } + bi->page = page; + } + + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + + /* + * if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_pages(page, ixgbe_rx_pg_order(rx_ring)); + bi->page = NULL; + + rx_ring->rx_stats.alloc_rx_page_failed++; + return false; + } + + bi->dma = dma; + bi->page_offset = 0; + + return true; } /** - * ixgbe_alloc_rx_buffers - Replace used receive buffers; packet split + * ixgbe_alloc_rx_buffers - Replace used receive buffers * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace **/ @@ -1105,327 +1486,636 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count) { union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi; - struct sk_buff *skb; u16 i = rx_ring->next_to_use; - /* do nothing if no valid netdev defined */ - if (!rx_ring->netdev) + /* nothing to do */ + if (!cleaned_count) return; - while (cleaned_count--) { - rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - skb = bi->skb; - - if (!skb) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - rx_ring->rx_buf_len); - if (!skb) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - goto no_buffers; - } - /* initialize queue mapping */ - skb_record_rx_queue(skb, rx_ring->queue_index); - bi->skb = skb; - } + rx_desc = IXGBE_RX_DESC(rx_ring, i); + bi = &rx_ring->rx_buffer_info[i]; + i -= rx_ring->count; - if (!bi->dma) { - bi->dma = dma_map_single(rx_ring->dev, - skb->data, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, bi->dma)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - bi->dma = 0; - goto no_buffers; - } + do { + if (!ixgbe_alloc_mapped_page(rx_ring, bi)) + break; + + /* + * Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + + rx_desc++; + bi++; + i++; + if (unlikely(!i)) { + rx_desc = IXGBE_RX_DESC(rx_ring, 0); + bi = rx_ring->rx_buffer_info; + i -= rx_ring->count; } - if (ring_is_ps_enabled(rx_ring)) { - if (!bi->page) { - bi->page = alloc_page(GFP_ATOMIC | __GFP_COLD); - if (!bi->page) { - rx_ring->rx_stats.alloc_rx_page_failed++; - goto no_buffers; - } - } + /* clear the hdr_addr for the next_to_use descriptor */ + rx_desc->read.hdr_addr = 0; - if (!bi->page_dma) { - /* use a half page if we're re-using */ - bi->page_offset ^= PAGE_SIZE / 2; - bi->page_dma = dma_map_page(rx_ring->dev, - bi->page, - bi->page_offset, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - if (dma_mapping_error(rx_ring->dev, - bi->page_dma)) { - rx_ring->rx_stats.alloc_rx_page_failed++; - bi->page_dma = 0; - goto no_buffers; - } - } + cleaned_count--; + } while (cleaned_count); - /* Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); - rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); - } else { - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); - rx_desc->read.hdr_addr = 0; - } + i += rx_ring->count; - i++; - if (i == rx_ring->count) - i = 0; + if (rx_ring->next_to_use != i) + ixgbe_release_rx_desc(rx_ring, i); +} + +/** + * ixgbe_get_headlen - determine size of header for RSC/LRO/GRO/FCOE + * @data: pointer to the start of the headers + * @max_len: total length of section to find headers in + * + * This function is meant to determine the length of headers that will + * be recognized by hardware for LRO, GRO, and RSC offloads. The main + * motivation of doing this is to only perform one pull for IPv4 TCP + * packets so that we can do basic things like calculating the gso_size + * based on the average data per packet. + **/ +static unsigned int ixgbe_get_headlen(unsigned char *data, + unsigned int max_len) +{ + union { + unsigned char *network; + /* l2 headers */ + struct ethhdr *eth; + struct vlan_hdr *vlan; + /* l3 headers */ + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + } hdr; + __be16 protocol; + u8 nexthdr = 0; /* default to not TCP */ + u8 hlen; + + /* this should never happen, but better safe than sorry */ + if (max_len < ETH_HLEN) + return max_len; + + /* initialize network frame pointer */ + hdr.network = data; + + /* set first protocol and move network header forward */ + protocol = hdr.eth->h_proto; + hdr.network += ETH_HLEN; + + /* handle any vlan tag if present */ + if (protocol == htons(ETH_P_8021Q)) { + if ((hdr.network - data) > (max_len - VLAN_HLEN)) + return max_len; + + protocol = hdr.vlan->h_vlan_encapsulated_proto; + hdr.network += VLAN_HLEN; } -no_buffers: - if (rx_ring->next_to_use != i) { - rx_ring->next_to_use = i; - ixgbe_release_rx_desc(rx_ring, i); + /* handle L3 protocols */ + if (protocol == htons(ETH_P_IP)) { + if ((hdr.network - data) > (max_len - sizeof(struct iphdr))) + return max_len; + + /* access ihl as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct iphdr)) + return hdr.network - data; + + /* record next protocol if header is present */ + if (!(hdr.ipv4->frag_off & htons(IP_OFFSET))) + nexthdr = hdr.ipv4->protocol; + } else if (protocol == htons(ETH_P_IPV6)) { + if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) + return max_len; + + /* record next protocol */ + nexthdr = hdr.ipv6->nexthdr; + hlen = sizeof(struct ipv6hdr); +#ifdef IXGBE_FCOE + } else if (protocol == htons(ETH_P_FCOE)) { + if ((hdr.network - data) > (max_len - FCOE_HEADER_LEN)) + return max_len; + hlen = FCOE_HEADER_LEN; +#endif + } else { + return hdr.network - data; } + + /* relocate pointer to start of L4 header */ + hdr.network += hlen; + + /* finally sort out TCP/UDP */ + if (nexthdr == IPPROTO_TCP) { + if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) + return max_len; + + /* access doff as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[12] & 0xF0) >> 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct tcphdr)) + return hdr.network - data; + + hdr.network += hlen; + } else if (nexthdr == IPPROTO_UDP) { + if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) + return max_len; + + hdr.network += sizeof(struct udphdr); + } + + /* + * If everything has gone correctly hdr.network should be the + * data section of the packet and will be the end of the header. + * If not then it probably represents the end of the last recognized + * header. + */ + if ((hdr.network - data) < max_len) + return hdr.network - data; + else + return max_len; } -static inline u16 ixgbe_get_hlen(union ixgbe_adv_rx_desc *rx_desc) +static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, + struct sk_buff *skb) { - /* HW will not DMA in data larger than the given buffer, even if it - * parses the (NFS, of course) header to be larger. In that case, it - * fills the header buffer and spills the rest into the page. - */ - u16 hdr_info = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info); - u16 hlen = (hdr_info & IXGBE_RXDADV_HDRBUFLEN_MASK) >> - IXGBE_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IXGBE_RX_HDR_SIZE) - hlen = IXGBE_RX_HDR_SIZE; - return hlen; + u16 hdr_len = skb_headlen(skb); + + /* set gso_size to avoid messing up TCP MSS */ + skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len), + IXGBE_CB(skb)->append_cnt); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; +} + +static void ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + /* if append_cnt is 0 then frame is not RSC */ + if (!IXGBE_CB(skb)->append_cnt) + return; + + rx_ring->rx_stats.rsc_count += IXGBE_CB(skb)->append_cnt; + rx_ring->rx_stats.rsc_flush++; + + ixgbe_set_rsc_gso_size(rx_ring, skb); + + /* gso_size is computed using append_cnt so always clear it last */ + IXGBE_CB(skb)->append_cnt = 0; } /** - * ixgbe_transform_rsc_queue - change rsc queue into a full packet - * @skb: pointer to the last skb in the rsc queue + * ixgbe_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated * - * This function changes a queue full of hw rsc buffers into a completed - * packet. It uses the ->prev pointers to find the first packet and then - * turns it into the frag list owner. + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, timestamp, protocol, and + * other fields within the skb. **/ -static inline struct sk_buff *ixgbe_transform_rsc_queue(struct sk_buff *skb) +static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - unsigned int frag_list_size = 0; - unsigned int skb_cnt = 1; + struct net_device *dev = rx_ring->netdev; + + ixgbe_update_rsc_stats(rx_ring, skb); + + ixgbe_rx_hash(rx_ring, rx_desc, skb); + + ixgbe_rx_checksum(rx_ring, rx_desc, skb); - while (skb->prev) { - struct sk_buff *prev = skb->prev; - frag_list_size += skb->len; - skb->prev = NULL; - skb = prev; - skb_cnt++; + if (unlikely(ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) + ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector->adapter, skb); + + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && + ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { + u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } - skb_shinfo(skb)->frag_list = skb->next; - skb->next = NULL; - skb->len += frag_list_size; - skb->data_len += frag_list_size; - skb->truesize += frag_list_size; - IXGBE_RSC_CB(skb)->skb_cnt = skb_cnt; + skb_record_rx_queue(skb, rx_ring->queue_index); - return skb; + skb->protocol = eth_type_trans(skb, dev); } -static inline bool ixgbe_get_rsc_state(union ixgbe_adv_rx_desc *rx_desc) +static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, + struct sk_buff *skb) { - return !!(le32_to_cpu(rx_desc->wb.lower.lo_dword.data) & - IXGBE_RXDADV_RSCCNT_MASK); + struct ixgbe_adapter *adapter = q_vector->adapter; + + if (ixgbe_qv_busy_polling(q_vector)) + netif_receive_skb(skb); + else if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) + napi_gro_receive(&q_vector->napi, skb); + else + netif_rx(skb); } -static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, - struct ixgbe_ring *rx_ring, - int budget) +/** + * ixgbe_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: Current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - struct ixgbe_adapter *adapter = q_vector->adapter; - union ixgbe_adv_rx_desc *rx_desc, *next_rxd; - struct ixgbe_rx_buffer *rx_buffer_info, *next_buffer; - struct sk_buff *skb; - unsigned int total_rx_bytes = 0, total_rx_packets = 0; - const int current_node = numa_node_id(); -#ifdef IXGBE_FCOE - int ddp_bytes = 0; -#endif /* IXGBE_FCOE */ - u32 staterr; - u16 i; - u16 cleaned_count = 0; - bool pkt_is_rsc = false; - - i = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC_ADV(rx_ring, i); - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); - - while (staterr & IXGBE_RXD_STAT_DD) { - u32 upper_len = 0; - - rmb(); /* read descriptor and rx_buffer_info after status DD */ - - rx_buffer_info = &rx_ring->rx_buffer_info[i]; - - skb = rx_buffer_info->skb; - rx_buffer_info->skb = NULL; - prefetch(skb->data); - - if (ring_is_rsc_enabled(rx_ring)) - pkt_is_rsc = ixgbe_get_rsc_state(rx_desc); - - /* linear means we are building an skb from multiple pages */ - if (!skb_is_nonlinear(skb)) { - u16 hlen; - if (pkt_is_rsc && - !(staterr & IXGBE_RXD_STAT_EOP) && - !skb->prev) { - /* - * When HWRSC is enabled, delay unmapping - * of the first packet. It carries the - * header information, HW may still - * access the header after the writeback. - * Only unmap it when EOP is reached - */ - IXGBE_RSC_CB(skb)->delay_unmap = true; - IXGBE_RSC_CB(skb)->dma = rx_buffer_info->dma; - } else { - dma_unmap_single(rx_ring->dev, - rx_buffer_info->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - } - rx_buffer_info->dma = 0; + u32 ntc = rx_ring->next_to_clean + 1; - if (ring_is_ps_enabled(rx_ring)) { - hlen = ixgbe_get_hlen(rx_desc); - upper_len = le16_to_cpu(rx_desc->wb.upper.length); - } else { - hlen = le16_to_cpu(rx_desc->wb.upper.length); - } + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; - skb_put(skb, hlen); - } else { - /* assume packet split since header is unmapped */ - upper_len = le16_to_cpu(rx_desc->wb.upper.length); - } + prefetch(IXGBE_RX_DESC(rx_ring, ntc)); - if (upper_len) { - dma_unmap_page(rx_ring->dev, - rx_buffer_info->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - rx_buffer_info->page_dma = 0; - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buffer_info->page, - rx_buffer_info->page_offset, - upper_len); - - if ((page_count(rx_buffer_info->page) == 1) && - (page_to_nid(rx_buffer_info->page) == current_node)) - get_page(rx_buffer_info->page); - else - rx_buffer_info->page = NULL; + /* update RSC append count if present */ + if (ring_is_rsc_enabled(rx_ring)) { + __le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data & + cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); + + if (unlikely(rsc_enabled)) { + u32 rsc_cnt = le32_to_cpu(rsc_enabled); - skb->len += upper_len; - skb->data_len += upper_len; - skb->truesize += PAGE_SIZE / 2; + rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; + IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; + + /* update ntc based on RSC value */ + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= IXGBE_RXDADV_NEXTP_MASK; + ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; } + } - i++; - if (i == rx_ring->count) - i = 0; + /* if we are the last buffer then there is nothing else to do */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; - next_rxd = IXGBE_RX_DESC_ADV(rx_ring, i); - prefetch(next_rxd); - cleaned_count++; + /* place skb in next buffer to be received */ + rx_ring->rx_buffer_info[ntc].skb = skb; + rx_ring->rx_stats.non_eop_descs++; - if (pkt_is_rsc) { - u32 nextp = (staterr & IXGBE_RXDADV_NEXTP_MASK) >> - IXGBE_RXDADV_NEXTP_SHIFT; - next_buffer = &rx_ring->rx_buffer_info[nextp]; - } else { - next_buffer = &rx_ring->rx_buffer_info[i]; - } + return true; +} - if (!(staterr & IXGBE_RXD_STAT_EOP)) { - if (ring_is_ps_enabled(rx_ring)) { - rx_buffer_info->skb = next_buffer->skb; - rx_buffer_info->dma = next_buffer->dma; - next_buffer->skb = skb; - next_buffer->dma = 0; - } else { - skb->next = next_buffer->skb; - skb->next->prev = skb; - } - rx_ring->rx_stats.non_eop_descs++; - goto next_desc; - } +/** + * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being adjusted + * + * This function is an ixgbe specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; - if (skb->prev) { - skb = ixgbe_transform_rsc_queue(skb); - /* if we got here without RSC the packet is invalid */ - if (!pkt_is_rsc) { - __pskb_trim(skb, 0); - rx_buffer_info->skb = skb; - goto next_desc; - } - } + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); - if (ring_is_rsc_enabled(rx_ring)) { - if (IXGBE_RSC_CB(skb)->delay_unmap) { - dma_unmap_single(rx_ring->dev, - IXGBE_RSC_CB(skb)->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - IXGBE_RSC_CB(skb)->dma = 0; - IXGBE_RSC_CB(skb)->delay_unmap = false; - } - } - if (pkt_is_rsc) { - if (ring_is_ps_enabled(rx_ring)) - rx_ring->rx_stats.rsc_count += - skb_shinfo(skb)->nr_frags; - else - rx_ring->rx_stats.rsc_count += - IXGBE_RSC_CB(skb)->skb_cnt; - rx_ring->rx_stats.rsc_flush++; - } + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); - /* ERR_MASK will only have valid bits if EOP set */ - if (unlikely(staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)) { - dev_kfree_skb_any(skb); - goto next_desc; - } + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - ixgbe_rx_checksum(adapter, rx_desc, skb, staterr); - if (adapter->netdev->features & NETIF_F_RXHASH) - ixgbe_rx_hash(rx_desc, skb); + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - total_rx_packets++; +/** + * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being updated + * + * This function provides a basic DMA sync up for the first fragment of an + * skb. The reason for doing this is that the first fragment cannot be + * unmapped until we have reached the end of packet descriptor for a buffer + * chain. + */ +static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + /* if the page was released unmap it, else just sync our portion */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } else { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + frag->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + IXGBE_CB(skb)->dma = 0; +} + +/** + * ixgbe_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Check for corrupted packet headers caused by senders on the local L2 + * embedded NIC switch not setting up their Tx Descriptors right. These + * should be very rare. + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct net_device *netdev = rx_ring->netdev; + + /* verify that the packet does not have any known errors */ + if (unlikely(ixgbe_test_staterr(rx_desc, + IXGBE_RXDADV_ERR_FRAME_ERR_MASK) && + !(netdev->features & NETIF_F_RXALL))) { + dev_kfree_skb_any(skb); + return true; + } + + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + ixgbe_pull_tail(rx_ring, skb); - skb->protocol = eth_type_trans(skb, rx_ring->netdev); #ifdef IXGBE_FCOE - /* if ddp, not passing to ULD unless for FCP_RSP or error */ - if (ixgbe_rx_is_fcoe(adapter, rx_desc)) { - ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb, - staterr); - if (!ddp_bytes) { - dev_kfree_skb_any(skb); - goto next_desc; - } + /* do not attempt to pad FCoE Frames as this will disrupt DDP */ + if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) + return false; + +#endif + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; + + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } + + return false; +} + +/** + * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *old_buff) +{ + struct ixgbe_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + new_buff->page = old_buff->page; + new_buff->dma = old_buff->dma; + new_buff->page_offset = old_buff->page_offset; + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, + new_buff->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); +} + +/** + * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into + * + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. + **/ +static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, + struct ixgbe_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_bufsz(rx_ring); +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int last_offset = ixgbe_rx_pg_size(rx_ring) - + ixgbe_rx_bufsz(rx_ring); +#endif + + if ((size <= IXGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, truesize); + + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; + + /* + * since we are the only owner of the page and we need to + * increment it, just set the value to 2 in order to avoid + * an unecessary locked operation + */ + atomic_set(&page->_count, 2); +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; + + /* bump ref count on page before it is given to the stack */ + get_page(page); +#endif + + return true; +} + +static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + struct ixgbe_rx_buffer *rx_buffer; + struct sk_buff *skb; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + page = rx_buffer->page; + prefetchw(page); + + skb = rx_buffer->skb; + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + return NULL; } -#endif /* IXGBE_FCOE */ - ixgbe_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc); - budget--; -next_desc: - rx_desc->wb.upper.status_error = 0; + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); - if (!budget) - break; + /* + * Delay unmapping of the first packet. It carries the + * header information, HW may still access the header + * after the writeback. Only unmap it when EOP is + * reached + */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + goto dma_sync; + + IXGBE_CB(skb)->dma = rx_buffer->dma; + } else { + if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + ixgbe_dma_sync_frag(rx_ring, skb); + +dma_sync: + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + + /* pull page into skb */ + if (ixgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + } + + /* clear contents of buffer_info */ + rx_buffer->skb = NULL; + rx_buffer->dma = 0; + rx_buffer->page = NULL; + + return skb; +} + +/** + * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf + * @q_vector: structure containing interrupt and ring information + * @rx_ring: rx descriptor ring to transact packets on + * @budget: Total limit on number of packets to process + * + * This function provides a "bounce buffer" approach to Rx interrupt + * processing. The advantage to this is that on systems that have + * expensive overhead for IOMMU access this provides a means of avoiding + * it by maintaining the mapping of the page to the syste. + * + * Returns amount of work completed + **/ +static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, + struct ixgbe_ring *rx_ring, + const int budget) +{ + unsigned int total_rx_bytes = 0, total_rx_packets = 0; +#ifdef IXGBE_FCOE + struct ixgbe_adapter *adapter = q_vector->adapter; + int ddp_bytes; + unsigned int mss = 0; +#endif /* IXGBE_FCOE */ + u16 cleaned_count = ixgbe_desc_unused(rx_ring); + + while (likely(total_rx_packets < budget)) { + union ixgbe_adv_rx_desc *rx_desc; + struct sk_buff *skb; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { @@ -1433,31 +2123,72 @@ next_desc: cleaned_count = 0; } - /* use prefetched values */ - rx_desc = next_rxd; - staterr = le32_to_cpu(rx_desc->wb.upper.status_error); - } + rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); - rx_ring->next_to_clean = i; - cleaned_count = ixgbe_desc_unused(rx_ring); + if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) + break; - if (cleaned_count) - ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); + + /* retrieve a buffer from the ring */ + skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); + + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; + + cleaned_count++; + + /* place incomplete frames back on ring for completion */ + if (ixgbe_is_non_eop(rx_ring, rx_desc, skb)) + continue; + + /* verify the packet layout is correct */ + if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb)) + continue; + + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + + /* populate checksum, timestamp, VLAN, and protocol */ + ixgbe_process_skb_fields(rx_ring, rx_desc, skb); #ifdef IXGBE_FCOE - /* include DDPed FCoE data */ - if (ddp_bytes > 0) { - unsigned int mss; - - mss = rx_ring->netdev->mtu - sizeof(struct fcoe_hdr) - - sizeof(struct fc_frame_header) - - sizeof(struct fcoe_crc_eof); - if (mss > 512) - mss &= ~511; - total_rx_bytes += ddp_bytes; - total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); - } + /* if ddp, not passing to ULD unless for FCP_RSP or error */ + if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) { + ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb); + /* include DDPed FCoE data */ + if (ddp_bytes > 0) { + if (!mss) { + mss = rx_ring->netdev->mtu - + sizeof(struct fcoe_hdr) - + sizeof(struct fc_frame_header) - + sizeof(struct fcoe_crc_eof); + if (mss > 512) + mss &= ~511; + } + total_rx_bytes += ddp_bytes; + total_rx_packets += DIV_ROUND_UP(ddp_bytes, + mss); + } + if (!ddp_bytes) { + dev_kfree_skb_any(skb); + continue; + } + } + #endif /* IXGBE_FCOE */ + skb_mark_napi_id(skb, &q_vector->napi); + ixgbe_rx_skb(q_vector, skb); + + /* update budget accounting */ + total_rx_packets++; + } u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; @@ -1466,9 +2197,46 @@ next_desc: q_vector->rx.total_packets += total_rx_packets; q_vector->rx.total_bytes += total_rx_bytes; - return !!budget; + if (cleaned_count) + ixgbe_alloc_rx_buffers(rx_ring, cleaned_count); + + return total_rx_packets; } +#ifdef CONFIG_NET_RX_BUSY_POLL +/* must be called with local_bh_disable()d */ +static int ixgbe_low_latency_recv(struct napi_struct *napi) +{ + struct ixgbe_q_vector *q_vector = + container_of(napi, struct ixgbe_q_vector, napi); + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_ring *ring; + int found = 0; + + if (test_bit(__IXGBE_DOWN, &adapter->state)) + return LL_FLUSH_FAILED; + + if (!ixgbe_qv_lock_poll(q_vector)) + return LL_FLUSH_BUSY; + + ixgbe_for_each_ring(ring, q_vector->rx) { + found = ixgbe_clean_rx_irq(q_vector, ring, 4); +#ifdef BP_EXTENDED_STATS + if (found) + ring->stats.cleaned += found; + else + ring->stats.misses++; +#endif + if (found) + break; + } + + ixgbe_qv_unlock_poll(q_vector); + + return found; +} +#endif /* CONFIG_NET_RX_BUSY_POLL */ + /** * ixgbe_configure_msix - Configure MSI-X hardware * @adapter: board private structure @@ -1479,11 +2247,9 @@ next_desc: static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) { struct ixgbe_q_vector *q_vector; - int q_vectors, v_idx; + int v_idx; u32 mask; - q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - /* Populate MSIX to EITR Select */ if (adapter->num_vfs > 32) { u32 eitrsel = (1 << (adapter->num_vfs - 32)) - 1; @@ -1494,30 +2260,16 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) * Populate the IVAR table and set the ITR values to the * corresponding register. */ - for (v_idx = 0; v_idx < q_vectors; v_idx++) { + for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { struct ixgbe_ring *ring; q_vector = adapter->q_vector[v_idx]; - for (ring = q_vector->rx.ring; ring != NULL; ring = ring->next) + ixgbe_for_each_ring(ring, q_vector->rx) ixgbe_set_ivar(adapter, 0, ring->reg_idx, v_idx); - for (ring = q_vector->tx.ring; ring != NULL; ring = ring->next) + ixgbe_for_each_ring(ring, q_vector->tx) ixgbe_set_ivar(adapter, 1, ring->reg_idx, v_idx); - if (q_vector->tx.ring && !q_vector->rx.ring) { - /* tx only vector */ - if (adapter->tx_itr_setting == 1) - q_vector->itr = IXGBE_10K_ITR; - else - q_vector->itr = adapter->tx_itr_setting; - } else { - /* rx or rx/tx vector */ - if (adapter->rx_itr_setting == 1) - q_vector->itr = IXGBE_20K_ITR; - else - q_vector->itr = adapter->rx_itr_setting; - } - ixgbe_write_eitr(q_vector); } @@ -1569,38 +2321,40 @@ enum latency_range { static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, struct ixgbe_ring_container *ring_container) { - u64 bytes_perint; - struct ixgbe_adapter *adapter = q_vector->adapter; int bytes = ring_container->total_bytes; int packets = ring_container->total_packets; u32 timepassed_us; + u64 bytes_perint; u8 itr_setting = ring_container->itr; if (packets == 0) return; /* simple throttlerate management - * 0-20MB/s lowest (100000 ints/s) - * 20-100MB/s low (20000 ints/s) - * 100-1249MB/s bulk (8000 ints/s) + * 0-10MB/s lowest (100000 ints/s) + * 10-20MB/s low (20000 ints/s) + * 20-1249MB/s bulk (8000 ints/s) */ /* what was last interrupt timeslice? */ timepassed_us = q_vector->itr >> 2; + if (timepassed_us == 0) + return; + bytes_perint = bytes / timepassed_us; /* bytes/usec */ switch (itr_setting) { case lowest_latency: - if (bytes_perint > adapter->eitr_low) + if (bytes_perint > 10) itr_setting = low_latency; break; case low_latency: - if (bytes_perint > adapter->eitr_high) + if (bytes_perint > 20) itr_setting = bulk_latency; - else if (bytes_perint <= adapter->eitr_low) + else if (bytes_perint <= 10) itr_setting = lowest_latency; break; case bulk_latency: - if (bytes_perint <= adapter->eitr_high) + if (bytes_perint <= 20) itr_setting = low_latency; break; } @@ -1626,7 +2380,7 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector) struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_hw *hw = &adapter->hw; int v_idx = q_vector->v_idx; - u32 itr_reg = q_vector->itr; + u32 itr_reg = q_vector->itr & IXGBE_MAX_EITR; switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: @@ -1678,14 +2432,14 @@ static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector) ((9 * new_itr) + q_vector->itr); /* save the algorithm value here */ - q_vector->itr = new_itr & IXGBE_MAX_EITR; + q_vector->itr = new_itr; ixgbe_write_eitr(q_vector); } } /** - * ixgbe_check_overtemp_subtask - check for over tempurature + * ixgbe_check_overtemp_subtask - check for over temperature * @adapter: pointer to adapter **/ static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter) @@ -1716,10 +2470,10 @@ static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter) return; if (!(eicr & IXGBE_EICR_LSC) && hw->mac.ops.check_link) { - u32 autoneg; + u32 speed; bool link_up = false; - hw->mac.ops.check_link(hw, &autoneg, &link_up, false); + hw->mac.ops.check_link(hw, &speed, &link_up, false); if (link_up) return; @@ -1914,6 +2668,10 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues, default: break; } + + if (adapter->hw.mac.type == ixgbe_mac_X540) + mask |= IXGBE_EIMS_TIMESYNC; + if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) && !(adapter->flags2 & IXGBE_FLAG2_FDIR_REQUIRES_REINIT)) mask |= IXGBE_EIMS_FLOW_DIR; @@ -1938,6 +2696,16 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) * with the write to EICR. */ eicr = IXGBE_READ_REG(hw, IXGBE_EICS); + + /* The lower 16bits of the EICR register are for the queue interrupts + * which should be masked here in order to not accidently clear them if + * the bits are high when ixgbe_msix_other is called. There is a race + * condition otherwise which results in possible performance loss + * especially if the ixgbe_msix_other interrupt is triggering + * consistently (as it would when PPS is turned on for the X540 device) + */ + eicr &= 0xFFFF0000; + IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr); if (eicr & IXGBE_EICR_LSC) @@ -1949,9 +2717,12 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: - if (eicr & IXGBE_EICR_ECC) - e_info(link, "Received unrecoverable ECC Err, please " - "reboot\n"); + if (eicr & IXGBE_EICR_ECC) { + e_info(link, "Received ECC Err, initiating reset\n"); + adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + ixgbe_service_event_schedule(adapter); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); + } /* Handle Flow Director Full threshold interrupt */ if (eicr & IXGBE_EICR_FLOW_DIR) { int reinit_count = 0; @@ -1978,6 +2749,9 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) ixgbe_check_fan_failure(adapter, eicr); + if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) + ixgbe_ptp_check_pps_event(adapter, eicr); + /* re-enable the original interrupt state, no lsc, no queues */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) ixgbe_irq_enable(adapter, false, false); @@ -1997,76 +2771,57 @@ static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data) return IRQ_HANDLED; } -static inline void map_vector_to_rxq(struct ixgbe_adapter *a, int v_idx, - int r_idx) +/** + * ixgbe_poll - NAPI Rx polling callback + * @napi: structure for representing this polling device + * @budget: how many packets driver is allowed to clean + * + * This function is used for legacy and MSI, NAPI mode + **/ +int ixgbe_poll(struct napi_struct *napi, int budget) { - struct ixgbe_q_vector *q_vector = a->q_vector[v_idx]; - struct ixgbe_ring *rx_ring = a->rx_ring[r_idx]; + struct ixgbe_q_vector *q_vector = + container_of(napi, struct ixgbe_q_vector, napi); + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_ring *ring; + int per_ring_budget; + bool clean_complete = true; - rx_ring->q_vector = q_vector; - rx_ring->next = q_vector->rx.ring; - q_vector->rx.ring = rx_ring; - q_vector->rx.count++; -} +#ifdef CONFIG_IXGBE_DCA + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) + ixgbe_update_dca(q_vector); +#endif -static inline void map_vector_to_txq(struct ixgbe_adapter *a, int v_idx, - int t_idx) -{ - struct ixgbe_q_vector *q_vector = a->q_vector[v_idx]; - struct ixgbe_ring *tx_ring = a->tx_ring[t_idx]; + ixgbe_for_each_ring(ring, q_vector->tx) + clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); - tx_ring->q_vector = q_vector; - tx_ring->next = q_vector->tx.ring; - q_vector->tx.ring = tx_ring; - q_vector->tx.count++; - q_vector->tx.work_limit = a->tx_work_limit; -} + if (!ixgbe_qv_lock_napi(q_vector)) + return budget; -/** - * ixgbe_map_rings_to_vectors - Maps descriptor rings to vectors - * @adapter: board private structure to initialize - * - * This function maps descriptor rings to the queue-specific vectors - * we were allotted through the MSI-X enabling code. Ideally, we'd have - * one vector per ring/queue, but on a constrained vector budget, we - * group the rings as "efficiently" as possible. You would add new - * mapping configurations in here. - **/ -static void ixgbe_map_rings_to_vectors(struct ixgbe_adapter *adapter) -{ - int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - int rxr_remaining = adapter->num_rx_queues, rxr_idx = 0; - int txr_remaining = adapter->num_tx_queues, txr_idx = 0; - int v_start = 0; + /* attempt to distribute budget to each queue fairly, but don't allow + * the budget to go below 1 because we'll exit polling */ + if (q_vector->rx.count > 1) + per_ring_budget = max(budget/q_vector->rx.count, 1); + else + per_ring_budget = budget; - /* only one q_vector if MSI-X is disabled. */ - if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) - q_vectors = 1; + ixgbe_for_each_ring(ring, q_vector->rx) + clean_complete &= (ixgbe_clean_rx_irq(q_vector, ring, + per_ring_budget) < per_ring_budget); - /* - * If we don't have enough vectors for a 1-to-1 mapping, we'll have to - * group them so there are multiple queues per vector. - * - * Re-adjusting *qpv takes care of the remainder. - */ - for (; v_start < q_vectors && rxr_remaining; v_start++) { - int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_start); - for (; rqpv; rqpv--, rxr_idx++, rxr_remaining--) - map_vector_to_rxq(adapter, v_start, rxr_idx); - } + ixgbe_qv_unlock_napi(q_vector); + /* If all work not completed, return budget and keep polling */ + if (!clean_complete) + return budget; - /* - * If there are not enough q_vectors for each ring to have it's own - * vector then we must pair up Rx/Tx on a each vector - */ - if ((v_start + txr_remaining) > q_vectors) - v_start = 0; + /* all work done, exit the polling mode */ + napi_complete(napi); + if (adapter->rx_itr_setting & 1) + ixgbe_set_itr(q_vector); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) + ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx)); - for (; v_start < q_vectors && txr_remaining; v_start++) { - int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_start); - for (; tqpv; tqpv--, txr_idx++, txr_remaining--) - map_vector_to_txq(adapter, v_start, txr_idx); - } + return 0; } /** @@ -2079,11 +2834,10 @@ static void ixgbe_map_rings_to_vectors(struct ixgbe_adapter *adapter) static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; int vector, err; int ri = 0, ti = 0; - for (vector = 0; vector < q_vectors; vector++) { + for (vector = 0; vector < adapter->num_q_vectors; vector++) { struct ixgbe_q_vector *q_vector = adapter->q_vector[vector]; struct msix_entry *entry = &adapter->msix_entries[vector]; @@ -2112,14 +2866,14 @@ static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter) if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { /* assign the mask for this irq */ irq_set_affinity_hint(entry->vector, - q_vector->affinity_mask); + &q_vector->affinity_mask); } } err = request_irq(adapter->msix_entries[vector].vector, ixgbe_msix_other, 0, netdev->name, adapter); if (err) { - e_err(probe, "request_irq for msix_lsc failed: %d\n", err); + e_err(probe, "request_irq for msix_other failed: %d\n", err); goto free_queue_irqs; } @@ -2153,7 +2907,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data) u32 eicr; /* - * Workaround for silicon errata on 82598. Mask the interrupts + * Workaround for silicon errata #26 on 82598. Mask the interrupt * before the read of EICR. */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK); @@ -2182,9 +2936,12 @@ static irqreturn_t ixgbe_intr(int irq, void *data) ixgbe_check_sfp_event(adapter, eicr); /* Fall through */ case ixgbe_mac_X540: - if (eicr & IXGBE_EICR_ECC) - e_info(link, "Received unrecoverable ECC err, please " - "reboot\n"); + if (eicr & IXGBE_EICR_ECC) { + e_info(link, "Received ECC Err, initiating reset\n"); + adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + ixgbe_service_event_schedule(adapter); + IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); + } ixgbe_check_overtemp_event(adapter, eicr); break; default: @@ -2192,48 +2949,22 @@ static irqreturn_t ixgbe_intr(int irq, void *data) } ixgbe_check_fan_failure(adapter, eicr); + if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) + ixgbe_ptp_check_pps_event(adapter, eicr); - if (napi_schedule_prep(&(q_vector->napi))) { - /* would disable interrupts here but EIAM disabled it */ - __napi_schedule(&(q_vector->napi)); - } + /* would disable interrupts here but EIAM disabled it */ + napi_schedule(&q_vector->napi); /* * re-enable link(maybe) and non-queue interrupts, no flush. * ixgbe_poll will re-enable the queue interrupts */ - if (!test_bit(__IXGBE_DOWN, &adapter->state)) ixgbe_irq_enable(adapter, false, false); return IRQ_HANDLED; } -static inline void ixgbe_reset_q_vectors(struct ixgbe_adapter *adapter) -{ - int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - int i; - - /* legacy and MSI only use one vector */ - if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) - q_vectors = 1; - - for (i = 0; i < adapter->num_rx_queues; i++) { - adapter->rx_ring[i]->q_vector = NULL; - adapter->rx_ring[i]->next = NULL; - } - for (i = 0; i < adapter->num_tx_queues; i++) { - adapter->tx_ring[i]->q_vector = NULL; - adapter->tx_ring[i]->next = NULL; - } - - for (i = 0; i < q_vectors; i++) { - struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; - memset(&q_vector->rx, 0, sizeof(struct ixgbe_ring_container)); - memset(&q_vector->tx, 0, sizeof(struct ixgbe_ring_container)); - } -} - /** * ixgbe_request_irq - initialize interrupts * @adapter: board private structure @@ -2246,9 +2977,6 @@ static int ixgbe_request_irq(struct ixgbe_adapter *adapter) struct net_device *netdev = adapter->netdev; int err; - /* map all of the rings to the q_vectors */ - ixgbe_map_rings_to_vectors(adapter); - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) err = ixgbe_request_msix_irqs(adapter); else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) @@ -2258,45 +2986,36 @@ static int ixgbe_request_irq(struct ixgbe_adapter *adapter) err = request_irq(adapter->pdev->irq, ixgbe_intr, IRQF_SHARED, netdev->name, adapter); - if (err) { + if (err) e_err(probe, "request_irq failed, Error %d\n", err); - /* place q_vectors and rings back into a known good state */ - ixgbe_reset_q_vectors(adapter); - } - return err; } static void ixgbe_free_irq(struct ixgbe_adapter *adapter) { - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { - int i, q_vectors; + int vector; - q_vectors = adapter->num_msix_vectors; - i = q_vectors - 1; - free_irq(adapter->msix_entries[i].vector, adapter); - i--; + if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { + free_irq(adapter->pdev->irq, adapter); + return; + } - for (; i >= 0; i--) { - /* free only the irqs that were actually requested */ - if (!adapter->q_vector[i]->rx.ring && - !adapter->q_vector[i]->tx.ring) - continue; + for (vector = 0; vector < adapter->num_q_vectors; vector++) { + struct ixgbe_q_vector *q_vector = adapter->q_vector[vector]; + struct msix_entry *entry = &adapter->msix_entries[vector]; - /* clear the affinity_mask in the IRQ descriptor */ - irq_set_affinity_hint(adapter->msix_entries[i].vector, - NULL); + /* free only the irqs that were actually requested */ + if (!q_vector->rx.ring && !q_vector->tx.ring) + continue; - free_irq(adapter->msix_entries[i].vector, - adapter->q_vector[i]); - } - } else { - free_irq(adapter->pdev->irq, adapter); + /* clear the affinity_mask in the IRQ descriptor */ + irq_set_affinity_hint(entry->vector, NULL); + + free_irq(entry->vector, q_vector); } - /* clear q_vector state information */ - ixgbe_reset_q_vectors(adapter); + free_irq(adapter->msix_entries[vector++].vector, adapter); } /** @@ -2320,9 +3039,12 @@ static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter) } IXGBE_WRITE_FLUSH(&adapter->hw); if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { - int i; - for (i = 0; i < adapter->num_msix_vectors; i++) - synchronize_irq(adapter->msix_entries[i].vector); + int vector; + + for (vector = 0; vector < adapter->num_q_vectors; vector++) + synchronize_irq(adapter->msix_entries[vector].vector); + + synchronize_irq(adapter->msix_entries[vector++].vector); } else { synchronize_irq(adapter->pdev->irq); } @@ -2336,12 +3058,6 @@ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter) { struct ixgbe_q_vector *q_vector = adapter->q_vector[0]; - /* rx/tx vector */ - if (adapter->rx_itr_setting == 1) - q_vector->itr = IXGBE_20K_ITR; - else - q_vector->itr = adapter->rx_itr_setting; - ixgbe_write_eitr(q_vector); ixgbe_set_ivar(adapter, 0, 0, 0); @@ -2377,28 +3093,36 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, ring->count * sizeof(union ixgbe_adv_tx_desc)); IXGBE_WRITE_REG(hw, IXGBE_TDH(reg_idx), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(reg_idx), 0); - ring->tail = hw->hw_addr + IXGBE_TDT(reg_idx); + ring->tail = adapter->io_addr + IXGBE_TDT(reg_idx); /* * set WTHRESH to encourage burst writeback, it should not be set - * higher than 1 when ITR is 0 as it could cause false TX hangs + * higher than 1 when: + * - ITR is 0 as it could cause false TX hangs + * - ITR is set to > 100k int/sec and BQL is enabled * * In order to avoid issues WTHRESH + PTHRESH should always be equal * to or less than the number of on chip descriptors, which is * currently 40. */ - if (!adapter->tx_itr_setting || !adapter->rx_itr_setting) +#if IS_ENABLED(CONFIG_BQL) + if (!ring->q_vector || (ring->q_vector->itr < IXGBE_100K_ITR)) +#else + if (!ring->q_vector || (ring->q_vector->itr < 8)) +#endif txdctl |= (1 << 16); /* WTHRESH = 1 */ else txdctl |= (8 << 16); /* WTHRESH = 8 */ - /* PTHRESH=32 is needed to avoid a Tx hang with DFP enabled. */ + /* + * Setting PTHRESH to 32 both improves performance + * and avoids a TX hang with DFP enabled + */ txdctl |= (1 << 8) | /* HTHRESH = 1 */ 32; /* PTHRESH = 32 */ /* reinitialize flowdirector state */ - if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) && - adapter->atr_sample_rate) { + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { ring->atr_sample_rate = adapter->atr_sample_rate; ring->atr_count = 0; set_bit(__IXGBE_TX_FDIR_INIT_DONE, &ring->state); @@ -2406,6 +3130,16 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, ring->atr_sample_rate = 0; } + /* initialize XPS */ + if (!test_and_set_bit(__IXGBE_TX_XPS_INIT_DONE, &ring->state)) { + struct ixgbe_q_vector *q_vector = ring->q_vector; + + if (q_vector) + netif_set_xps_queue(ring->netdev, + &q_vector->affinity_mask, + ring->queue_index); + } + clear_bit(__IXGBE_HANG_CHECK_ARMED, &ring->state); /* enable queue */ @@ -2428,8 +3162,7 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - u32 rttdcs; - u32 reg; + u32 rttdcs, mtqc; u8 tcs = netdev_get_num_tc(adapter->netdev); if (hw->mac.type == ixgbe_mac_82598EB) @@ -2441,28 +3174,32 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); /* set transmit pool layout */ - switch (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { - case (IXGBE_FLAG_SRIOV_ENABLED): - IXGBE_WRITE_REG(hw, IXGBE_MTQC, - (IXGBE_MTQC_VT_ENA | IXGBE_MTQC_64VF)); - break; - default: - if (!tcs) - reg = IXGBE_MTQC_64Q_1PB; - else if (tcs <= 4) - reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + mtqc = IXGBE_MTQC_VT_ENA; + if (tcs > 4) + mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; + else if (tcs > 1) + mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; + else if (adapter->ring_feature[RING_F_RSS].indices == 4) + mtqc |= IXGBE_MTQC_32VF; + else + mtqc |= IXGBE_MTQC_64VF; + } else { + if (tcs > 4) + mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; + else if (tcs > 1) + mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; else - reg = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; + mtqc = IXGBE_MTQC_64Q_1PB; + } - IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg); + IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc); - /* Enable Security TX Buffer IFG for multiple pb */ - if (tcs) { - reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG); - reg |= IXGBE_SECTX_DCB; - IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg); - } - break; + /* Enable Security TX Buffer IFG for multiple pb */ + if (tcs) { + u32 sectx = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG); + sectx |= IXGBE_SECTX_DCB; + IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, sectx); } /* re-enable the arbiter */ @@ -2496,51 +3233,90 @@ static void ixgbe_configure_tx(struct ixgbe_adapter *adapter) ixgbe_configure_tx_ring(adapter, adapter->tx_ring[i]); } +static void ixgbe_enable_rx_drop(struct ixgbe_adapter *adapter, + struct ixgbe_ring *ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u8 reg_idx = ring->reg_idx; + u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(reg_idx)); + + srrctl |= IXGBE_SRRCTL_DROP_EN; + + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl); +} + +static void ixgbe_disable_rx_drop(struct ixgbe_adapter *adapter, + struct ixgbe_ring *ring) +{ + struct ixgbe_hw *hw = &adapter->hw; + u8 reg_idx = ring->reg_idx; + u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(reg_idx)); + + srrctl &= ~IXGBE_SRRCTL_DROP_EN; + + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl); +} + +#ifdef CONFIG_IXGBE_DCB +void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) +#else +static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) +#endif +{ + int i; + bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; + + if (adapter->ixgbe_ieee_pfc) + pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); + + /* + * We should set the drop enable bit if: + * SR-IOV is enabled + * or + * Number of Rx queues > 1 and flow control is disabled + * + * This allows us to avoid head of line blocking for security + * and performance reasons. + */ + if (adapter->num_vfs || (adapter->num_rx_queues > 1 && + !(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en)) { + for (i = 0; i < adapter->num_rx_queues; i++) + ixgbe_enable_rx_drop(adapter, adapter->rx_ring[i]); + } else { + for (i = 0; i < adapter->num_rx_queues; i++) + ixgbe_disable_rx_drop(adapter, adapter->rx_ring[i]); + } +} + #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, struct ixgbe_ring *rx_ring) { + struct ixgbe_hw *hw = &adapter->hw; u32 srrctl; u8 reg_idx = rx_ring->reg_idx; - switch (adapter->hw.mac.type) { - case ixgbe_mac_82598EB: { - struct ixgbe_ring_feature *feature = adapter->ring_feature; - const int mask = feature[RING_F_RSS].mask; - reg_idx = reg_idx & mask; - } - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - default: - break; - } + if (hw->mac.type == ixgbe_mac_82598EB) { + u16 mask = adapter->ring_feature[RING_F_RSS].mask; - srrctl = IXGBE_READ_REG(&adapter->hw, IXGBE_SRRCTL(reg_idx)); + /* + * if VMDq is not active we must program one srrctl register + * per RSS queue since we have enabled RDRXCTL.MVMEN + */ + reg_idx &= mask; + } - srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; - srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; - if (adapter->num_vfs) - srrctl |= IXGBE_SRRCTL_DROP_EN; + /* configure header buffer length, needed for RSC */ + srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= (IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT) & - IXGBE_SRRCTL_BSIZEHDR_MASK; + /* configure the packet buffer length */ + srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - if (ring_is_ps_enabled(rx_ring)) { -#if (PAGE_SIZE / 2) > IXGBE_MAX_RXBUFFER - srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#else - srrctl |= (PAGE_SIZE / 2) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#endif - srrctl |= IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; - } else { - srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; - srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - } + /* configure descriptor type */ + srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - IXGBE_WRITE_REG(&adapter->hw, IXGBE_SRRCTL(reg_idx), srrctl); + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(reg_idx), srrctl); } static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) @@ -2552,11 +3328,15 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) u32 mrqc = 0, reta = 0; u32 rxcsum; int i, j; - u8 tcs = netdev_get_num_tc(adapter->netdev); - int maxq = adapter->ring_feature[RING_F_RSS].indices; + u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; - if (tcs) - maxq = min(maxq, adapter->num_tx_queues / tcs); + /* + * Program table for at least 2 queues w/ SR-IOV so that VFs can + * make full use of any rings they may have. We will use the + * PSRTYPE register to control how many rings we use within the PF. + */ + if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 2)) + rss_i = 2; /* Fill out hash function seeds */ for (i = 0; i < 10; i++) @@ -2564,7 +3344,7 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) /* Fill out redirection table */ for (i = 0, j = 0; i < 128; i++, j++) { - if (j == maxq) + if (j == rss_i) j = 0; /* reta = 4-byte sliding window of * 0x00..(indices-1)(indices-1)00..etc. */ @@ -2578,35 +3358,41 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) rxcsum |= IXGBE_RXCSUM_PCSD; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); - if (adapter->hw.mac.type == ixgbe_mac_82598EB && - (adapter->flags & IXGBE_FLAG_RSS_ENABLED)) { - mrqc = IXGBE_MRQC_RSSEN; + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + if (adapter->ring_feature[RING_F_RSS].mask) + mrqc = IXGBE_MRQC_RSSEN; } else { - int mask = adapter->flags & (IXGBE_FLAG_RSS_ENABLED - | IXGBE_FLAG_SRIOV_ENABLED); - - switch (mask) { - case (IXGBE_FLAG_RSS_ENABLED): - if (!tcs) - mrqc = IXGBE_MRQC_RSSEN; - else if (tcs <= 4) - mrqc = IXGBE_MRQC_RTRSS4TCEN; + u8 tcs = netdev_get_num_tc(adapter->netdev); + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + if (tcs > 4) + mrqc = IXGBE_MRQC_VMDQRT8TCEN; /* 8 TCs */ + else if (tcs > 1) + mrqc = IXGBE_MRQC_VMDQRT4TCEN; /* 4 TCs */ + else if (adapter->ring_feature[RING_F_RSS].indices == 4) + mrqc = IXGBE_MRQC_VMDQRSS32EN; else + mrqc = IXGBE_MRQC_VMDQRSS64EN; + } else { + if (tcs > 4) mrqc = IXGBE_MRQC_RTRSS8TCEN; - break; - case (IXGBE_FLAG_SRIOV_ENABLED): - mrqc = IXGBE_MRQC_VMDQEN; - break; - default: - break; + else if (tcs > 1) + mrqc = IXGBE_MRQC_RTRSS4TCEN; + else + mrqc = IXGBE_MRQC_RSSEN; } } /* Perform hash on these packet types */ - mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4 - | IXGBE_MRQC_RSS_FIELD_IPV4_TCP - | IXGBE_MRQC_RSS_FIELD_IPV6 - | IXGBE_MRQC_RSS_FIELD_IPV6_TCP; + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4 | + IXGBE_MRQC_RSS_FIELD_IPV4_TCP | + IXGBE_MRQC_RSS_FIELD_IPV6 | + IXGBE_MRQC_RSS_FIELD_IPV6_TCP; + + if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; + if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); } @@ -2621,13 +3407,11 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, { struct ixgbe_hw *hw = &adapter->hw; u32 rscctrl; - int rx_buf_len; u8 reg_idx = ring->reg_idx; if (!ring_is_rsc_enabled(ring)) return; - rx_buf_len = ring->rx_buf_len; rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(reg_idx)); rscctrl |= IXGBE_RSCCTL_RSCEN; /* @@ -2635,54 +3419,10 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, * total size of max desc * buf_len is not greater * than 65536 */ - if (ring_is_ps_enabled(ring)) { -#if (PAGE_SIZE < 8192) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (PAGE_SIZE < 16384) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#elif (PAGE_SIZE < 32768) - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; -#else - rscctrl |= IXGBE_RSCCTL_MAXDESC_1; -#endif - } else { - if (rx_buf_len <= IXGBE_RXBUFFER_4K) - rscctrl |= IXGBE_RSCCTL_MAXDESC_16; - else if (rx_buf_len <= IXGBE_RXBUFFER_8K) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; - else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; - } + rscctrl |= IXGBE_RSCCTL_MAXDESC_16; IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl); } -/** - * ixgbe_set_uta - Set unicast filter table address - * @adapter: board private structure - * - * The unicast table address is a register array of 32-bit registers. - * The table is meant to be used in a way similar to how the MTA is used - * however due to certain limitations in the hardware it is necessary to - * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous - * enable bit to allow vlan tag stripping when promiscuous mode is enabled - **/ -static void ixgbe_set_uta(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - int i; - - /* The UTA table only exists on 82599 hardware and newer */ - if (hw->mac.type < ixgbe_mac_82599EB) - return; - - /* we only need to do this if VMDq is enabled */ - if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) - return; - - for (i = 0; i < 128; i++) - IXGBE_WRITE_REG(hw, IXGBE_UTA(i), ~0); -} - #define IXGBE_MAX_RX_DESC_POLL 10 static void ixgbe_rx_desc_queue_enable(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring) @@ -2692,6 +3432,8 @@ static void ixgbe_rx_desc_queue_enable(struct ixgbe_adapter *adapter, u32 rxdctl; u8 reg_idx = ring->reg_idx; + if (ixgbe_removed(hw->hw_addr)) + return; /* RXDCTL.EN will return 0 on 82598 if link is down, so skip it */ if (hw->mac.type == ixgbe_mac_82598EB && !(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP)) @@ -2716,6 +3458,8 @@ void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter, u32 rxdctl; u8 reg_idx = ring->reg_idx; + if (ixgbe_removed(hw->hw_addr)) + return; rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx)); rxdctl &= ~IXGBE_RXDCTL_ENABLE; @@ -2756,19 +3500,11 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, ring->count * sizeof(union ixgbe_adv_rx_desc)); IXGBE_WRITE_REG(hw, IXGBE_RDH(reg_idx), 0); IXGBE_WRITE_REG(hw, IXGBE_RDT(reg_idx), 0); - ring->tail = hw->hw_addr + IXGBE_RDT(reg_idx); + ring->tail = adapter->io_addr + IXGBE_RDT(reg_idx); ixgbe_configure_srrctl(adapter, ring); ixgbe_configure_rscctl(adapter, ring); - /* If operating in IOV mode set RLPML for X540 */ - if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && - hw->mac.type == ixgbe_mac_X540) { - rxdctl &= ~IXGBE_RXDCTL_RLPMLMASK; - rxdctl |= ((ring->netdev->mtu + ETH_HLEN + - ETH_FCS_LEN + VLAN_HLEN) | IXGBE_RXDCTL_RLPML_EN); - } - if (hw->mac.type == ixgbe_mac_82598EB) { /* * enable cache line friendly hardware writes: @@ -2792,7 +3528,8 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter, static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - int p; + int rss_i = adapter->ring_feature[RING_F_RSS].indices; + u16 pool; /* PSRTYPE must be initialized in non 82598 adapters */ u32 psrtype = IXGBE_PSRTYPE_TCPHDR | @@ -2804,58 +3541,67 @@ static void ixgbe_setup_psrtype(struct ixgbe_adapter *adapter) if (hw->mac.type == ixgbe_mac_82598EB) return; - if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) - psrtype |= (adapter->num_rx_queues_per_pool << 29); + if (rss_i > 3) + psrtype |= 2 << 29; + else if (rss_i > 1) + psrtype |= 1 << 29; - for (p = 0; p < adapter->num_rx_pools; p++) - IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(adapter->num_vfs + p), - psrtype); + for_each_set_bit(pool, &adapter->fwd_bitmask, 32) + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype); } static void ixgbe_configure_virtualization(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - u32 gcr_ext; - u32 vt_reg_bits; u32 reg_offset, vf_shift; - u32 vmdctl; + u32 gcr_ext, vmdctl; int i; if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) return; vmdctl = IXGBE_READ_REG(hw, IXGBE_VT_CTL); - vt_reg_bits = IXGBE_VMD_CTL_VMDQ_EN | IXGBE_VT_CTL_REPLEN; - vt_reg_bits |= (adapter->num_vfs << IXGBE_VT_CTL_POOL_SHIFT); - IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl | vt_reg_bits); + vmdctl |= IXGBE_VMD_CTL_VMDQ_EN; + vmdctl &= ~IXGBE_VT_CTL_POOL_MASK; + vmdctl |= VMDQ_P(0) << IXGBE_VT_CTL_POOL_SHIFT; + vmdctl |= IXGBE_VT_CTL_REPLEN; + IXGBE_WRITE_REG(hw, IXGBE_VT_CTL, vmdctl); - vf_shift = adapter->num_vfs % 32; - reg_offset = (adapter->num_vfs >= 32) ? 1 : 0; + vf_shift = VMDQ_P(0) % 32; + reg_offset = (VMDQ_P(0) >= 32) ? 1 : 0; /* Enable only the PF's pool for Tx/Rx */ - IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), (1 << vf_shift)); - IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset ^ 1), 0); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), (1 << vf_shift)); - IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset ^ 1), 0); - IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), (~0) << vf_shift); + IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset ^ 1), reg_offset - 1); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), (~0) << vf_shift); + IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset ^ 1), reg_offset - 1); + if (adapter->flags2 & IXGBE_FLAG2_BRIDGE_MODE_VEB) + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); /* Map PF MAC address in RAR Entry 0 to first pool following VFs */ - hw->mac.ops.set_vmdq(hw, 0, adapter->num_vfs); + hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0)); /* * Set up VF register offsets for selected VT Mode, * i.e. 32 or 64 VFs for SR-IOV */ - gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); - gcr_ext |= IXGBE_GCR_EXT_MSIX_EN; - gcr_ext |= IXGBE_GCR_EXT_VT_MODE_64; + switch (adapter->ring_feature[RING_F_VMDQ].mask) { + case IXGBE_82599_VMDQ_8Q_MASK: + gcr_ext = IXGBE_GCR_EXT_VT_MODE_16; + break; + case IXGBE_82599_VMDQ_4Q_MASK: + gcr_ext = IXGBE_GCR_EXT_VT_MODE_32; + break; + default: + gcr_ext = IXGBE_GCR_EXT_VT_MODE_64; + break; + } + IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); - /* enable Tx loopback for VF/PF communication */ - IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN); + /* Enable MAC Anti-Spoofing */ - hw->mac.ops.set_mac_anti_spoofing(hw, - (adapter->num_vfs != 0), + hw->mac.ops.set_mac_anti_spoofing(hw, (adapter->num_vfs != 0), adapter->num_vfs); /* For VFs that have spoof checking turned off */ for (i = 0; i < adapter->num_vfs; i++) { @@ -2869,23 +3615,10 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - int rx_buf_len; struct ixgbe_ring *rx_ring; int i; u32 mhadd, hlreg0; - /* Decide whether to use packet split mode or not */ - /* On by default */ - adapter->flags |= IXGBE_FLAG_RX_PS_ENABLED; - - /* Do not use packet split if we're in SR-IOV Mode */ - if (adapter->num_vfs) - adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; - - /* Disable packet split due to 82599 erratum #45 */ - if (hw->mac.type == ixgbe_mac_82599EB) - adapter->flags &= ~IXGBE_FLAG_RX_PS_ENABLED; - #ifdef IXGBE_FCOE /* adjust max frame to be able to do baby jumbo for FCoE */ if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) && @@ -2893,6 +3626,11 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) max_frame = IXGBE_FCOE_JUMBO_FRAME_SIZE; #endif /* IXGBE_FCOE */ + + /* adjust max frame to be at least the size of a standard frame */ + if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) + max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN); + mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); if (max_frame != (mhadd >> IXGBE_MHADD_MFS_SHIFT)) { mhadd &= ~IXGBE_MHADD_MFS_MASK; @@ -2901,30 +3639,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } - /* MHADD will allow an extra 4 bytes past for vlan tagged frames */ - max_frame += VLAN_HLEN; - - /* Set the RX buffer length according to the mode */ - if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) { - rx_buf_len = IXGBE_RX_HDR_SIZE; - } else { - if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) && - (netdev->mtu <= ETH_DATA_LEN)) - rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; - /* - * Make best use of allocation by using all but 1K of a - * power of 2 allocation that will be used for skb->head. - */ - else if (max_frame <= IXGBE_RXBUFFER_3K) - rx_buf_len = IXGBE_RXBUFFER_3K; - else if (max_frame <= IXGBE_RXBUFFER_7K) - rx_buf_len = IXGBE_RXBUFFER_7K; - else if (max_frame <= IXGBE_RXBUFFER_15K) - rx_buf_len = IXGBE_RXBUFFER_15K; - else - rx_buf_len = IXGBE_MAX_RXBUFFER; - } - hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); /* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */ hlreg0 |= IXGBE_HLREG0_JUMBOEN; @@ -2936,34 +3650,10 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter) */ for (i = 0; i < adapter->num_rx_queues; i++) { rx_ring = adapter->rx_ring[i]; - rx_ring->rx_buf_len = rx_buf_len; - - if (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED) - set_ring_ps_enabled(rx_ring); - else - clear_ring_ps_enabled(rx_ring); - if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) set_ring_rsc_enabled(rx_ring); else clear_ring_rsc_enabled(rx_ring); - -#ifdef IXGBE_FCOE - if (netdev->features & NETIF_F_FCOE_MTU) { - struct ixgbe_ring_feature *f; - f = &adapter->ring_feature[RING_F_FCOE]; - if ((i >= f->mask) && (i < f->mask + f->indices)) { - clear_ring_ps_enabled(rx_ring); - if (rx_buf_len < IXGBE_FCOE_JUMBO_FRAME_SIZE) - rx_ring->rx_buf_len = - IXGBE_FCOE_JUMBO_FRAME_SIZE; - } else if (!ring_is_rsc_enabled(rx_ring) && - !ring_is_ps_enabled(rx_ring)) { - rx_ring->rx_buf_len = - IXGBE_FCOE_JUMBO_FRAME_SIZE; - } - } -#endif /* IXGBE_FCOE */ } } @@ -3014,7 +3704,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int i; - u32 rxctrl; + u32 rxctrl, rfctl; /* disable receives while setting up the descriptors */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); @@ -3023,11 +3713,16 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) ixgbe_setup_psrtype(adapter); ixgbe_setup_rdrxctl(adapter); + /* RSC Setup */ + rfctl = IXGBE_READ_REG(hw, IXGBE_RFCTL); + rfctl &= ~IXGBE_RFCTL_RSC_DIS; + if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) + rfctl |= IXGBE_RFCTL_RSC_DIS; + IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl); + /* Program registers for the distribution of queues */ ixgbe_setup_mrqc(adapter); - ixgbe_set_uta(adapter); - /* set_rx_buffer_len must be called before ring initialization */ ixgbe_set_rx_buffer_len(adapter); @@ -3047,62 +3742,33 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) hw->mac.ops.enable_rx_dma(hw, rxctrl); } -static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - int pool_ndx = adapter->num_vfs; /* add VID to filter table */ - hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, true); + hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), true); set_bit(vid, adapter->active_vlans); return 0; } -static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; - int pool_ndx = adapter->num_vfs; /* remove VID from filter table */ - hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, false); + hw->mac.ops.set_vfta(&adapter->hw, vid, VMDQ_P(0), false); clear_bit(vid, adapter->active_vlans); return 0; } /** - * ixgbe_vlan_filter_disable - helper to disable hw vlan filtering - * @adapter: driver data - */ -static void ixgbe_vlan_filter_disable(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 vlnctrl; - - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); -} - -/** - * ixgbe_vlan_filter_enable - helper to enable hw vlan filtering - * @adapter: driver data - */ -static void ixgbe_vlan_filter_enable(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - u32 vlnctrl; - - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl |= IXGBE_VLNCTRL_VFE; - vlnctrl &= ~IXGBE_VLNCTRL_CFIEN; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); -} - -/** * ixgbe_vlan_strip_disable - helper to disable hw vlan stripping * @adapter: driver data */ @@ -3121,7 +3787,11 @@ static void ixgbe_vlan_strip_disable(struct ixgbe_adapter *adapter) case ixgbe_mac_82599EB: case ixgbe_mac_X540: for (i = 0; i < adapter->num_rx_queues; i++) { - j = adapter->rx_ring[i]->reg_idx; + struct ixgbe_ring *ring = adapter->rx_ring[i]; + + if (ring->l2_accel_priv) + continue; + j = ring->reg_idx; vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)); vlnctrl &= ~IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), vlnctrl); @@ -3151,7 +3821,11 @@ static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter) case ixgbe_mac_82599EB: case ixgbe_mac_X540: for (i = 0; i < adapter->num_rx_queues; i++) { - j = adapter->rx_ring[i]->reg_idx; + struct ixgbe_ring *ring = adapter->rx_ring[i]; + + if (ring->l2_accel_priv) + continue; + j = ring->reg_idx; vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(j)); vlnctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(j), vlnctrl); @@ -3166,12 +3840,164 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter) { u16 vid; - ixgbe_vlan_rx_add_vid(adapter->netdev, 0); + ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), 0); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - ixgbe_vlan_rx_add_vid(adapter->netdev, vid); + ixgbe_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); +} + +/** + * ixgbe_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure + * + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA + **/ +static int ixgbe_write_mc_addr_list(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + + if (!netif_running(netdev)) + return 0; + + if (hw->mac.ops.update_mc_addr_list) + hw->mac.ops.update_mc_addr_list(hw, netdev); + else + return -ENOMEM; + +#ifdef CONFIG_PCI_IOV + ixgbe_restore_vf_multicasts(adapter); +#endif + + return netdev_mc_count(netdev); +} + +#ifdef CONFIG_PCI_IOV +void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, + adapter->mac_table[i].queue, + IXGBE_RAH_AV); + else + hw->mac.ops.clear_rar(hw, i); + + adapter->mac_table[i].state &= ~(IXGBE_MAC_STATE_MODIFIED); + } +} +#endif + +static void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { + if (adapter->mac_table[i].state & + IXGBE_MAC_STATE_IN_USE) + hw->mac.ops.set_rar(hw, i, + adapter->mac_table[i].addr, + adapter->mac_table[i].queue, + IXGBE_RAH_AV); + else + hw->mac.ops.clear_rar(hw, i); + + adapter->mac_table[i].state &= + ~(IXGBE_MAC_STATE_MODIFIED); + } + } +} + +static void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) +{ + int i; + struct ixgbe_hw *hw = &adapter->hw; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + } + ixgbe_sync_mac_table(adapter); +} + +static int ixgbe_available_rars(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i, count = 0; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state == 0) + count++; + } + return count; +} + +/* this function destroys the first RAR entry */ +static void ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter, + u8 *addr) +{ + struct ixgbe_hw *hw = &adapter->hw; + + memcpy(&adapter->mac_table[0].addr, addr, ETH_ALEN); + adapter->mac_table[0].queue = VMDQ_P(0); + adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | + IXGBE_MAC_STATE_IN_USE); + hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, + adapter->mac_table[0].queue, + IXGBE_RAH_AV); +} + +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + continue; + adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | + IXGBE_MAC_STATE_IN_USE); + ether_addr_copy(adapter->mac_table[i].addr, addr); + adapter->mac_table[i].queue = queue; + ixgbe_sync_mac_table(adapter); + return i; + } + return -ENOMEM; } +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +{ + /* search table for addr, if found, set to 0 and sync */ + int i; + struct ixgbe_hw *hw = &adapter->hw; + + if (is_zero_ether_addr(addr)) + return -EINVAL; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (ether_addr_equal(addr, adapter->mac_table[i].addr) && + adapter->mac_table[i].queue == queue) { + adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + ixgbe_sync_mac_table(adapter); + return 0; + } + } + return -ENOMEM; +} /** * ixgbe_write_uc_addr_list - write unicast addresses to RAR table * @netdev: network interface device structure @@ -3181,36 +4007,23 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter) * 0 on no addresses written * X on writing X addresses to the RAR table **/ -static int ixgbe_write_uc_addr_list(struct net_device *netdev) +static int ixgbe_write_uc_addr_list(struct net_device *netdev, int vfn) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - unsigned int vfn = adapter->num_vfs; - unsigned int rar_entries = IXGBE_MAX_PF_MACVLANS; int count = 0; /* return ENOMEM indicating insufficient memory for addresses */ - if (netdev_uc_count(netdev) > rar_entries) + if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) return -ENOMEM; - if (!netdev_uc_empty(netdev) && rar_entries) { + if (!netdev_uc_empty(netdev)) { struct netdev_hw_addr *ha; - /* return error if we do not support writing to RAR table */ - if (!hw->mac.ops.set_rar) - return -ENOMEM; - netdev_for_each_uc_addr(ha, netdev) { - if (!rar_entries) - break; - hw->mac.ops.set_rar(hw, rar_entries--, ha->addr, - vfn, IXGBE_RAH_AV); + ixgbe_del_mac_filter(adapter, ha->addr, vfn); + ixgbe_add_mac_filter(adapter, ha->addr, vfn); count++; } } - /* write the addresses in reverse order to avoid write combining */ - for (; rar_entries > 0 ; rar_entries--) - hw->mac.ops.clear_rar(hw, rar_entries); - return count; } @@ -3228,64 +4041,88 @@ void ixgbe_set_rx_mode(struct net_device *netdev) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE; + u32 vlnctrl; int count; /* Check for Promiscuous and All Multicast modes */ - fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* set all bits that we expect to always be set */ + fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */ fctrl |= IXGBE_FCTRL_BAM; fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */ fctrl |= IXGBE_FCTRL_PMCF; /* clear the bits we are changing the status of */ fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - + vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); if (netdev->flags & IFF_PROMISC) { hw->addr_ctrl.user_set_promisc = true; fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); - vmolr |= (IXGBE_VMOLR_ROPE | IXGBE_VMOLR_MPE); - /* don't hardware filter vlans in promisc mode */ - ixgbe_vlan_filter_disable(adapter); + vmolr |= IXGBE_VMOLR_MPE; + /* Only disable hardware filter vlans in promiscuous mode + * if SR-IOV and VMDQ are disabled - otherwise ensure + * that hardware VLAN filters remain enabled. + */ + if (!(adapter->flags & (IXGBE_FLAG_VMDQ_ENABLED | + IXGBE_FLAG_SRIOV_ENABLED))) + vlnctrl |= (IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); } else { if (netdev->flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; vmolr |= IXGBE_VMOLR_MPE; - } else { - /* - * Write addresses to the MTA, if the attempt fails - * then we should just turn on promiscuous mode so - * that we can at least receive multicast traffic - */ - hw->mac.ops.update_mc_addr_list(hw, netdev); - vmolr |= IXGBE_VMOLR_ROMPE; } - ixgbe_vlan_filter_enable(adapter); + vlnctrl |= IXGBE_VLNCTRL_VFE; hw->addr_ctrl.user_set_promisc = false; - /* - * Write addresses to available RAR registers, if there is not - * sufficient space to store all the addresses then enable - * unicast promiscuous mode - */ - count = ixgbe_write_uc_addr_list(netdev); - if (count < 0) { - fctrl |= IXGBE_FCTRL_UPE; - vmolr |= IXGBE_VMOLR_ROPE; - } } - if (adapter->num_vfs) { - ixgbe_restore_vf_multicasts(adapter); - vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(adapter->num_vfs)) & + /* + * Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + count = ixgbe_write_uc_addr_list(netdev, VMDQ_P(0)); + if (count < 0) { + fctrl |= IXGBE_FCTRL_UPE; + vmolr |= IXGBE_VMOLR_ROPE; + } + + /* Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + count = ixgbe_write_mc_addr_list(netdev); + if (count < 0) { + fctrl |= IXGBE_FCTRL_MPE; + vmolr |= IXGBE_VMOLR_MPE; + } else if (count) { + vmolr |= IXGBE_VMOLR_ROMPE; + } + + if (hw->mac.type != ixgbe_mac_82598EB) { + vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(VMDQ_P(0))) & ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_ROPE); - IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr); + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(0)), vmolr); } + /* This is useful for sniffing bad packets. */ + if (adapter->netdev->features & NETIF_F_RXALL) { + /* UPE and MPE will be handled by normal PROMISC logic + * in e1000e_set_rx_mode */ + fctrl |= (IXGBE_FCTRL_SBP | /* Receive bad packets */ + IXGBE_FCTRL_BAM | /* RX All Bcast Pkts */ + IXGBE_FCTRL_PMCF); /* RX All MAC Ctrl Pkts */ + + fctrl &= ~(IXGBE_FCTRL_DPF); + /* NOTE: VLAN filtering is disabled by setting PROMISC */ + } + + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); - if (netdev->features & NETIF_F_HW_VLAN_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) ixgbe_vlan_strip_enable(adapter); else ixgbe_vlan_strip_disable(adapter); @@ -3294,37 +4131,28 @@ void ixgbe_set_rx_mode(struct net_device *netdev) static void ixgbe_napi_enable_all(struct ixgbe_adapter *adapter) { int q_idx; - struct ixgbe_q_vector *q_vector; - int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - /* legacy and MSI only use one vector */ - if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) - q_vectors = 1; - for (q_idx = 0; q_idx < q_vectors; q_idx++) { - q_vector = adapter->q_vector[q_idx]; - napi_enable(&q_vector->napi); + for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) { + ixgbe_qv_init_lock(adapter->q_vector[q_idx]); + napi_enable(&adapter->q_vector[q_idx]->napi); } } static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) { int q_idx; - struct ixgbe_q_vector *q_vector; - int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - /* legacy and MSI only use one vector */ - if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) - q_vectors = 1; - for (q_idx = 0; q_idx < q_vectors; q_idx++) { - q_vector = adapter->q_vector[q_idx]; - napi_disable(&q_vector->napi); + for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++) { + napi_disable(&adapter->q_vector[q_idx]->napi); + while (!ixgbe_qv_disable(adapter->q_vector[q_idx])) { + pr_info("QV %d locked\n", q_idx); + usleep_range(1000, 20000); + } } } #ifdef CONFIG_IXGBE_DCB -/* +/** * ixgbe_configure_dcb - Configure DCB hardware * @adapter: ixgbe adapter struct * @@ -3346,12 +4174,6 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) if (hw->mac.type == ixgbe_mac_82598EB) netif_set_gso_max_size(adapter->netdev, 32768); - - /* Enable VLAN tag insert/strip */ - adapter->netdev->features |= NETIF_F_HW_VLAN_RX; - - hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true); - #ifdef IXGBE_FCOE if (adapter->netdev->features & NETIF_F_FCOE_MTU) max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); @@ -3375,19 +4197,16 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) /* Enable RSS Hash per TC */ if (hw->mac.type != ixgbe_mac_82598EB) { - int i; - u32 reg = 0; - - for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { - u8 msb = 0; - u8 cnt = adapter->netdev->tc_to_txq[i].count; + u32 msb = 0; + u16 rss_i = adapter->ring_feature[RING_F_RSS].indices - 1; - while (cnt >>= 1) - msb++; - - reg |= msb << IXGBE_RQTC_SHIFT_TC(i); + while (rss_i) { + msb++; + rss_i >>= 1; } - IXGBE_WRITE_REG(hw, IXGBE_RQTC, reg); + + /* write msb to all 8 TCs in one write */ + IXGBE_WRITE_REG(hw, IXGBE_RQTC, msb * 0x11111111); } } #endif @@ -3395,11 +4214,11 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) /* Additional bittime to account for IXGBE framing */ #define IXGBE_ETH_FRAMING 20 -/* +/** * ixgbe_hpbthresh - calculate high water mark for flow control * * @adapter: board private structure to calculate for - * @pb - packet buffer to calculate + * @pb: packet buffer to calculate */ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) { @@ -3413,16 +4232,10 @@ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) #ifdef IXGBE_FCOE /* FCoE traffic class uses FCOE jumbo frames */ - if (dev->features & NETIF_F_FCOE_MTU) { - int fcoe_pb = 0; - -#ifdef CONFIG_IXGBE_DCB - fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); - -#endif - if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) - tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; - } + if ((dev->features & NETIF_F_FCOE_MTU) && + (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) && + (pb == ixgbe_fcoe_get_tc(adapter))) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; #endif /* Calculate delay value for device */ @@ -3459,13 +4272,13 @@ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) return marker; } -/* +/** * ixgbe_lpbthresh - calculate low water mark for for flow control * * @adapter: board private structure to calculate for - * @pb - packet buffer to calculate + * @pb: packet buffer to calculate */ -static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter) +static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb) { struct ixgbe_hw *hw = &adapter->hw; struct net_device *dev = adapter->netdev; @@ -3475,6 +4288,14 @@ static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter) /* Calculate max LAN frame size */ tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN; +#ifdef IXGBE_FCOE + /* FCoE traffic class uses FCOE jumbo frames */ + if ((dev->features & NETIF_F_FCOE_MTU) && + (tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) && + (pb == netdev_get_prio_tc_map(dev, adapter->fcoe.up))) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; +#endif + /* Calculate delay value for device */ switch (hw->mac.type) { case ixgbe_mac_X540: @@ -3501,15 +4322,17 @@ static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter) if (!num_tc) num_tc = 1; - hw->fc.low_water = ixgbe_lpbthresh(adapter); - for (i = 0; i < num_tc; i++) { hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i); + hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i); /* Low water marks must not be larger than high water marks */ - if (hw->fc.low_water > hw->fc.high_water[i]) - hw->fc.low_water = 0; + if (hw->fc.low_water[i] > hw->fc.high_water[i]) + hw->fc.low_water[i] = 0; } + + for (; i < MAX_TRAFFIC_CLASS; i++) + hw->fc.high_water[i] = 0; } static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) @@ -3531,7 +4354,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *filter; spin_lock(&adapter->fdir_perfect_lock); @@ -3539,7 +4362,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) if (!hlist_empty(&adapter->fdir_filter_list)) ixgbe_fdir_set_input_mask_82599(hw, &adapter->fdir_mask); - hlist_for_each_entry_safe(filter, node, node2, + hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { ixgbe_fdir_write_perfect_filter_82599(hw, &filter->filter, @@ -3552,21 +4375,244 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) spin_unlock(&adapter->fdir_perfect_lock); } +static void ixgbe_macvlan_set_rx_mode(struct net_device *dev, unsigned int pool, + struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 vmolr; + + /* No unicast promiscuous support for VMDQ devices. */ + vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool)); + vmolr |= (IXGBE_VMOLR_ROMPE | IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE); + + /* clear the affected bit */ + vmolr &= ~IXGBE_VMOLR_MPE; + + if (dev->flags & IFF_ALLMULTI) { + vmolr |= IXGBE_VMOLR_MPE; + } else { + vmolr |= IXGBE_VMOLR_ROMPE; + hw->mac.ops.update_mc_addr_list(hw, dev); + } + ixgbe_write_uc_addr_list(adapter->netdev, pool); + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr); +} + +static void ixgbe_fwd_psrtype(struct ixgbe_fwd_adapter *vadapter) +{ + struct ixgbe_adapter *adapter = vadapter->real_adapter; + int rss_i = adapter->num_rx_queues_per_pool; + struct ixgbe_hw *hw = &adapter->hw; + u16 pool = vadapter->pool; + u32 psrtype = IXGBE_PSRTYPE_TCPHDR | + IXGBE_PSRTYPE_UDPHDR | + IXGBE_PSRTYPE_IPV4HDR | + IXGBE_PSRTYPE_L2HDR | + IXGBE_PSRTYPE_IPV6HDR; + + if (hw->mac.type == ixgbe_mac_82598EB) + return; + + if (rss_i > 3) + psrtype |= 2 << 29; + else if (rss_i > 1) + psrtype |= 1 << 29; + + IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(VMDQ_P(pool)), psrtype); +} + +/** + * ixgbe_clean_rx_ring - Free Rx Buffers per Queue + * @rx_ring: ring to free buffers from + **/ +static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + unsigned long size; + u16 i; + + /* ring already cleared, nothing to do */ + if (!rx_ring->rx_buffer_info) + return; + + /* Free all the Rx ring sk_buffs */ + for (i = 0; i < rx_ring->count; i++) { + struct ixgbe_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_buffer_info[i]; + if (rx_buffer->skb) { + struct sk_buff *skb = rx_buffer->skb; + if (IXGBE_CB(skb)->page_released) { + dma_unmap_page(dev, + IXGBE_CB(skb)->dma, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } + dev_kfree_skb(skb); + } + rx_buffer->skb = NULL; + if (rx_buffer->dma) + dma_unmap_page(dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + rx_buffer->dma = 0; + if (rx_buffer->page) + __free_pages(rx_buffer->page, + ixgbe_rx_pg_order(rx_ring)); + rx_buffer->page = NULL; + } + + size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; + memset(rx_ring->rx_buffer_info, 0, size); + + /* Zero out the descriptor ring */ + memset(rx_ring->desc, 0, rx_ring->size); + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; +} + +static void ixgbe_disable_fwd_ring(struct ixgbe_fwd_adapter *vadapter, + struct ixgbe_ring *rx_ring) +{ + struct ixgbe_adapter *adapter = vadapter->real_adapter; + int index = rx_ring->queue_index + vadapter->rx_base_queue; + + /* shutdown specific queue receive and wait for dma to settle */ + ixgbe_disable_rx_queue(adapter, rx_ring); + usleep_range(10000, 20000); + ixgbe_irq_disable_queues(adapter, ((u64)1 << index)); + ixgbe_clean_rx_ring(rx_ring); + rx_ring->l2_accel_priv = NULL; +} + +static int ixgbe_fwd_ring_down(struct net_device *vdev, + struct ixgbe_fwd_adapter *accel) +{ + struct ixgbe_adapter *adapter = accel->real_adapter; + unsigned int rxbase = accel->rx_base_queue; + unsigned int txbase = accel->tx_base_queue; + int i; + + netif_tx_stop_all_queues(vdev); + + for (i = 0; i < adapter->num_rx_queues_per_pool; i++) { + ixgbe_disable_fwd_ring(accel, adapter->rx_ring[rxbase + i]); + adapter->rx_ring[rxbase + i]->netdev = adapter->netdev; + } + + for (i = 0; i < adapter->num_rx_queues_per_pool; i++) { + adapter->tx_ring[txbase + i]->l2_accel_priv = NULL; + adapter->tx_ring[txbase + i]->netdev = adapter->netdev; + } + + + return 0; +} + +static int ixgbe_fwd_ring_up(struct net_device *vdev, + struct ixgbe_fwd_adapter *accel) +{ + struct ixgbe_adapter *adapter = accel->real_adapter; + unsigned int rxbase, txbase, queues; + int i, baseq, err = 0; + + if (!test_bit(accel->pool, &adapter->fwd_bitmask)) + return 0; + + baseq = accel->pool * adapter->num_rx_queues_per_pool; + netdev_dbg(vdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n", + accel->pool, adapter->num_rx_pools, + baseq, baseq + adapter->num_rx_queues_per_pool, + adapter->fwd_bitmask); + + accel->netdev = vdev; + accel->rx_base_queue = rxbase = baseq; + accel->tx_base_queue = txbase = baseq; + + for (i = 0; i < adapter->num_rx_queues_per_pool; i++) + ixgbe_disable_fwd_ring(accel, adapter->rx_ring[rxbase + i]); + + for (i = 0; i < adapter->num_rx_queues_per_pool; i++) { + adapter->rx_ring[rxbase + i]->netdev = vdev; + adapter->rx_ring[rxbase + i]->l2_accel_priv = accel; + ixgbe_configure_rx_ring(adapter, adapter->rx_ring[rxbase + i]); + } + + for (i = 0; i < adapter->num_rx_queues_per_pool; i++) { + adapter->tx_ring[txbase + i]->netdev = vdev; + adapter->tx_ring[txbase + i]->l2_accel_priv = accel; + } + + queues = min_t(unsigned int, + adapter->num_rx_queues_per_pool, vdev->num_tx_queues); + err = netif_set_real_num_tx_queues(vdev, queues); + if (err) + goto fwd_queue_err; + + err = netif_set_real_num_rx_queues(vdev, queues); + if (err) + goto fwd_queue_err; + + if (is_valid_ether_addr(vdev->dev_addr)) + ixgbe_add_mac_filter(adapter, vdev->dev_addr, accel->pool); + + ixgbe_fwd_psrtype(accel); + ixgbe_macvlan_set_rx_mode(vdev, accel->pool, adapter); + return err; +fwd_queue_err: + ixgbe_fwd_ring_down(vdev, accel); + return err; +} + +static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) +{ + struct net_device *upper; + struct list_head *iter; + int err; + + netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { + if (netif_is_macvlan(upper)) { + struct macvlan_dev *dfwd = netdev_priv(upper); + struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv; + + if (dfwd->fwd_priv) { + err = ixgbe_fwd_ring_up(upper, vadapter); + if (err) + continue; + } + } + } +} + static void ixgbe_configure(struct ixgbe_adapter *adapter) { + struct ixgbe_hw *hw = &adapter->hw; + ixgbe_configure_pb(adapter); #ifdef CONFIG_IXGBE_DCB ixgbe_configure_dcb(adapter); #endif + /* + * We must restore virtualization before VLANs or else + * the VLVF registers will not be populated + */ + ixgbe_configure_virtualization(adapter); ixgbe_set_rx_mode(adapter->netdev); ixgbe_restore_vlan(adapter); -#ifdef IXGBE_FCOE - if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) - ixgbe_configure_fcoe(adapter); + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + hw->mac.ops.disable_rx_buff(hw); + break; + default: + break; + } -#endif /* IXGBE_FCOE */ if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { ixgbe_init_fdir_signature_82599(&adapter->hw, adapter->fdir_pballoc); @@ -3576,10 +4622,23 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) ixgbe_fdir_filter_restore(adapter); } - ixgbe_configure_virtualization(adapter); + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + hw->mac.ops.enable_rx_buff(hw); + break; + default: + break; + } +#ifdef IXGBE_FCOE + /* configure FCoE L2 filters, redirection table, and Rx control */ + ixgbe_configure_fcoe(adapter); + +#endif /* IXGBE_FCOE */ ixgbe_configure_tx(adapter); ixgbe_configure_rx(adapter); + ixgbe_configure_dfwd(adapter); } static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw) @@ -3593,6 +4652,12 @@ static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw) case ixgbe_phy_sfp_passive_unknown: case ixgbe_phy_sfp_active_unknown: case ixgbe_phy_sfp_ftl_active: + case ixgbe_phy_qsfp_passive_unknown: + case ixgbe_phy_qsfp_active_unknown: + case ixgbe_phy_qsfp_intel: + case ixgbe_phy_qsfp_unknown: + /* ixgbe_phy_none is set when no SFP module is present */ + case ixgbe_phy_none: return true; case ixgbe_phy_nl: if (hw->mac.type == ixgbe_mac_82598EB) @@ -3628,25 +4693,25 @@ static void ixgbe_sfp_link_config(struct ixgbe_adapter *adapter) **/ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) { - u32 autoneg; - bool negotiation, link_up = false; + u32 speed; + bool autoneg, link_up = false; u32 ret = IXGBE_ERR_LINK_SETUP; if (hw->mac.ops.check_link) - ret = hw->mac.ops.check_link(hw, &autoneg, &link_up, false); + ret = hw->mac.ops.check_link(hw, &speed, &link_up, false); if (ret) goto link_cfg_out; - autoneg = hw->phy.autoneg_advertised; - if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) - ret = hw->mac.ops.get_link_capabilities(hw, &autoneg, - &negotiation); + speed = hw->phy.autoneg_advertised; + if ((!speed) && (hw->mac.ops.get_link_capabilities)) + ret = hw->mac.ops.get_link_capabilities(hw, &speed, + &autoneg); if (ret) goto link_cfg_out; if (hw->mac.ops.setup_link) - ret = hw->mac.ops.setup_link(hw, autoneg, negotiation, link_up); + ret = hw->mac.ops.setup_link(hw, speed, link_up); link_cfg_out: return ret; } @@ -3686,7 +4751,18 @@ static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter) if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { gpie &= ~IXGBE_GPIE_VTMODE_MASK; - gpie |= IXGBE_GPIE_VTMODE_64; + + switch (adapter->ring_feature[RING_F_VMDQ].mask) { + case IXGBE_82599_VMDQ_8Q_MASK: + gpie |= IXGBE_GPIE_VTMODE_16; + break; + case IXGBE_82599_VMDQ_4Q_MASK: + gpie |= IXGBE_GPIE_VTMODE_32; + break; + default: + gpie |= IXGBE_GPIE_VTMODE_64; + break; + } } /* Enable Thermal over heat sensor interrupt */ @@ -3729,13 +4805,11 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) else ixgbe_configure_msi_and_legacy(adapter); - /* enable the optics for both mult-speed fiber and 82599 SFP+ fiber */ - if (hw->mac.ops.enable_tx_laser && - ((hw->phy.multispeed_fiber) || - ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && - (hw->mac.type == ixgbe_mac_82599EB)))) + /* enable the optics for 82599 SFP+ fiber */ + if (hw->mac.ops.enable_tx_laser) hw->mac.ops.enable_tx_laser(hw); + smp_mb__before_atomic(); clear_bit(__IXGBE_DOWN, &adapter->state); ixgbe_napi_enable_all(adapter); @@ -3761,9 +4835,6 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter) e_crit(drv, "Fan has stopped, replace the adapter\n"); } - /* enable transmits */ - netif_tx_start_all_queues(adapter->netdev); - /* bring the link up in the watchdog, this could race with our first * link up interrupt but shouldn't be a problem */ adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; @@ -3808,8 +4879,12 @@ void ixgbe_up(struct ixgbe_adapter *adapter) void ixgbe_reset(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; int err; + u8 old_addr[ETH_ALEN]; + if (ixgbe_removed(hw->hw_addr)) + return; /* lock SFP init bit to prevent race conditions with the watchdog */ while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state)) usleep_range(1000, 2000); @@ -3842,74 +4917,17 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); + /* do not flush user set addresses */ + memcpy(old_addr, &adapter->mac_table[0].addr, netdev->addr_len); + ixgbe_flush_sw_mac_table(adapter); + ixgbe_mac_set_default_filter(adapter, old_addr); - /* reprogram the RAR[0] in case user changed it. */ - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, adapter->num_vfs, - IXGBE_RAH_AV); -} - -/** - * ixgbe_clean_rx_ring - Free Rx Buffers per Queue - * @rx_ring: ring to free buffers from - **/ -static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) -{ - struct device *dev = rx_ring->dev; - unsigned long size; - u16 i; + /* update SAN MAC vmdq pool selection */ + if (hw->mac.san_mac_rar_index) + hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0)); - /* ring already cleared, nothing to do */ - if (!rx_ring->rx_buffer_info) - return; - - /* Free all the Rx ring sk_buffs */ - for (i = 0; i < rx_ring->count; i++) { - struct ixgbe_rx_buffer *rx_buffer_info; - - rx_buffer_info = &rx_ring->rx_buffer_info[i]; - if (rx_buffer_info->dma) { - dma_unmap_single(rx_ring->dev, rx_buffer_info->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - rx_buffer_info->dma = 0; - } - if (rx_buffer_info->skb) { - struct sk_buff *skb = rx_buffer_info->skb; - rx_buffer_info->skb = NULL; - do { - struct sk_buff *this = skb; - if (IXGBE_RSC_CB(this)->delay_unmap) { - dma_unmap_single(dev, - IXGBE_RSC_CB(this)->dma, - rx_ring->rx_buf_len, - DMA_FROM_DEVICE); - IXGBE_RSC_CB(this)->dma = 0; - IXGBE_RSC_CB(skb)->delay_unmap = false; - } - skb = skb->prev; - dev_kfree_skb(this); - } while (skb); - } - if (!rx_buffer_info->page) - continue; - if (rx_buffer_info->page_dma) { - dma_unmap_page(dev, rx_buffer_info->page_dma, - PAGE_SIZE / 2, DMA_FROM_DEVICE); - rx_buffer_info->page_dma = 0; - } - put_page(rx_buffer_info->page); - rx_buffer_info->page = NULL; - rx_buffer_info->page_offset = 0; - } - - size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; - memset(rx_ring->rx_buffer_info, 0, size); - - /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); - - rx_ring->next_to_clean = 0; - rx_ring->next_to_use = 0; + if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) + ixgbe_ptp_reset(adapter); } /** @@ -3932,6 +4950,8 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring) ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); } + netdev_tx_reset_queue(txring_txq(tx_ring)); + size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; memset(tx_ring->tx_buffer_info, 0, size); @@ -3968,12 +4988,12 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter) static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter) { - struct hlist_node *node, *node2; + struct hlist_node *node2; struct ixgbe_fdir_filter *filter; spin_lock(&adapter->fdir_perfect_lock); - hlist_for_each_entry_safe(filter, node, node2, + hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { hlist_del(&filter->fdir_node); kfree(filter); @@ -3987,11 +5007,14 @@ void ixgbe_down(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct ixgbe_hw *hw = &adapter->hw; + struct net_device *upper; + struct list_head *iter; u32 rxctrl; int i; /* signal that we are down to the interrupt handler */ - set_bit(__IXGBE_DOWN, &adapter->state); + if (test_and_set_bit(__IXGBE_DOWN, &adapter->state)) + return; /* do nothing if already down */ /* disable receives */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); @@ -4010,6 +5033,19 @@ void ixgbe_down(struct ixgbe_adapter *adapter) netif_carrier_off(netdev); netif_tx_disable(netdev); + /* disable any upper devices */ + netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { + if (netif_is_macvlan(upper)) { + struct macvlan_dev *vlan = netdev_priv(upper); + + if (vlan->fwd_priv) { + netif_tx_stop_all_queues(upper); + netif_carrier_off(upper); + netif_tx_disable(upper); + } + } + } + ixgbe_irq_disable(adapter); ixgbe_napi_disable_all(adapter); @@ -4056,11 +5092,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) if (!pci_channel_offline(adapter->pdev)) ixgbe_reset(adapter); - /* power down the optics for multispeed fiber and 82599 SFP+ fiber */ - if (hw->mac.ops.disable_tx_laser && - ((hw->phy.multispeed_fiber) || - ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && - (hw->mac.type == ixgbe_mac_82599EB)))) + /* power down the optics for 82599 SFP+ fiber */ + if (hw->mac.ops.disable_tx_laser) hw->mac.ops.disable_tx_laser(hw); ixgbe_clean_all_tx_rings(adapter); @@ -4073,55 +5106,6 @@ void ixgbe_down(struct ixgbe_adapter *adapter) } /** - * ixgbe_poll - NAPI Rx polling callback - * @napi: structure for representing this polling device - * @budget: how many packets driver is allowed to clean - * - * This function is used for legacy and MSI, NAPI mode - **/ -static int ixgbe_poll(struct napi_struct *napi, int budget) -{ - struct ixgbe_q_vector *q_vector = - container_of(napi, struct ixgbe_q_vector, napi); - struct ixgbe_adapter *adapter = q_vector->adapter; - struct ixgbe_ring *ring; - int per_ring_budget; - bool clean_complete = true; - -#ifdef CONFIG_IXGBE_DCA - if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) - ixgbe_update_dca(q_vector); -#endif - - for (ring = q_vector->tx.ring; ring != NULL; ring = ring->next) - clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); - - /* attempt to distribute budget to each queue fairly, but don't allow - * the budget to go below 1 because we'll exit polling */ - if (q_vector->rx.count > 1) - per_ring_budget = max(budget/q_vector->rx.count, 1); - else - per_ring_budget = budget; - - for (ring = q_vector->rx.ring; ring != NULL; ring = ring->next) - clean_complete &= ixgbe_clean_rx_irq(q_vector, ring, - per_ring_budget); - - /* If all work not completed, return budget and keep polling */ - if (!clean_complete) - return budget; - - /* all work done, exit the polling mode */ - napi_complete(napi); - if (adapter->rx_itr_setting & 1) - ixgbe_set_itr(q_vector); - if (!test_bit(__IXGBE_DOWN, &adapter->state)) - ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx)); - - return 0; -} - -/** * ixgbe_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure **/ @@ -4134,802 +5118,6 @@ static void ixgbe_tx_timeout(struct net_device *netdev) } /** - * ixgbe_set_rss_queues: Allocate queues for RSS - * @adapter: board private structure to initialize - * - * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try - * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. - * - **/ -static inline bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter) -{ - bool ret = false; - struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_RSS]; - - if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { - f->mask = 0xF; - adapter->num_rx_queues = f->indices; - adapter->num_tx_queues = f->indices; - ret = true; - } else { - ret = false; - } - - return ret; -} - -/** - * ixgbe_set_fdir_queues: Allocate queues for Flow Director - * @adapter: board private structure to initialize - * - * Flow Director is an advanced Rx filter, attempting to get Rx flows back - * to the original CPU that initiated the Tx session. This runs in addition - * to RSS, so if a packet doesn't match an FDIR filter, we can still spread the - * Rx load across CPUs using RSS. - * - **/ -static inline bool ixgbe_set_fdir_queues(struct ixgbe_adapter *adapter) -{ - bool ret = false; - struct ixgbe_ring_feature *f_fdir = &adapter->ring_feature[RING_F_FDIR]; - - f_fdir->indices = min((int)num_online_cpus(), f_fdir->indices); - f_fdir->mask = 0; - - /* Flow Director must have RSS enabled */ - if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) && - (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)) { - adapter->num_tx_queues = f_fdir->indices; - adapter->num_rx_queues = f_fdir->indices; - ret = true; - } else { - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - } - return ret; -} - -#ifdef IXGBE_FCOE -/** - * ixgbe_set_fcoe_queues: Allocate queues for Fiber Channel over Ethernet (FCoE) - * @adapter: board private structure to initialize - * - * FCoE RX FCRETA can use up to 8 rx queues for up to 8 different exchanges. - * The ring feature mask is not used as a mask for FCoE, as it can take any 8 - * rx queues out of the max number of rx queues, instead, it is used as the - * index of the first rx queue used by FCoE. - * - **/ -static inline bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter) -{ - struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_FCOE]; - - if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) - return false; - - f->indices = min((int)num_online_cpus(), f->indices); - - adapter->num_rx_queues = 1; - adapter->num_tx_queues = 1; - - if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { - e_info(probe, "FCoE enabled with RSS\n"); - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) - ixgbe_set_fdir_queues(adapter); - else - ixgbe_set_rss_queues(adapter); - } - - /* adding FCoE rx rings to the end */ - f->mask = adapter->num_rx_queues; - adapter->num_rx_queues += f->indices; - adapter->num_tx_queues += f->indices; - - return true; -} -#endif /* IXGBE_FCOE */ - -/* Artificial max queue cap per traffic class in DCB mode */ -#define DCB_QUEUE_CAP 8 - -#ifdef CONFIG_IXGBE_DCB -static inline bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter) -{ - int per_tc_q, q, i, offset = 0; - struct net_device *dev = adapter->netdev; - int tcs = netdev_get_num_tc(dev); - - if (!tcs) - return false; - - /* Map queue offset and counts onto allocated tx queues */ - per_tc_q = min(dev->num_tx_queues / tcs, (unsigned int)DCB_QUEUE_CAP); - q = min((int)num_online_cpus(), per_tc_q); - - for (i = 0; i < tcs; i++) { - netdev_set_tc_queue(dev, i, q, offset); - offset += q; - } - - adapter->num_tx_queues = q * tcs; - adapter->num_rx_queues = q * tcs; - -#ifdef IXGBE_FCOE - /* FCoE enabled queues require special configuration indexed - * by feature specific indices and mask. Here we map FCoE - * indices onto the DCB queue pairs allowing FCoE to own - * configuration later. - */ - if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { - int tc; - struct ixgbe_ring_feature *f = - &adapter->ring_feature[RING_F_FCOE]; - - tc = netdev_get_prio_tc_map(dev, adapter->fcoe.up); - f->indices = dev->tc_to_txq[tc].count; - f->mask = dev->tc_to_txq[tc].offset; - } -#endif - - return true; -} -#endif - -/** - * ixgbe_set_sriov_queues: Allocate queues for IOV use - * @adapter: board private structure to initialize - * - * IOV doesn't actually use anything, so just NAK the - * request for now and let the other queue routines - * figure out what to do. - */ -static inline bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) -{ - return false; -} - -/* - * ixgbe_set_num_queues: Allocate queues for device, feature dependent - * @adapter: board private structure to initialize - * - * This is the top level queue allocation routine. The order here is very - * important, starting with the "most" number of features turned on at once, - * and ending with the smallest set of features. This way large combinations - * can be allocated if they're turned on, and smaller combinations are the - * fallthrough conditions. - * - **/ -static int ixgbe_set_num_queues(struct ixgbe_adapter *adapter) -{ - /* Start with base case */ - adapter->num_rx_queues = 1; - adapter->num_tx_queues = 1; - adapter->num_rx_pools = adapter->num_rx_queues; - adapter->num_rx_queues_per_pool = 1; - - if (ixgbe_set_sriov_queues(adapter)) - goto done; - -#ifdef CONFIG_IXGBE_DCB - if (ixgbe_set_dcb_queues(adapter)) - goto done; - -#endif -#ifdef IXGBE_FCOE - if (ixgbe_set_fcoe_queues(adapter)) - goto done; - -#endif /* IXGBE_FCOE */ - if (ixgbe_set_fdir_queues(adapter)) - goto done; - - if (ixgbe_set_rss_queues(adapter)) - goto done; - - /* fallback to base case */ - adapter->num_rx_queues = 1; - adapter->num_tx_queues = 1; - -done: - if ((adapter->netdev->reg_state == NETREG_UNREGISTERED) || - (adapter->netdev->reg_state == NETREG_UNREGISTERING)) - return 0; - - /* Notify the stack of the (possibly) reduced queue counts. */ - netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues); - return netif_set_real_num_rx_queues(adapter->netdev, - adapter->num_rx_queues); -} - -static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter, - int vectors) -{ - int err, vector_threshold; - - /* We'll want at least 3 (vector_threshold): - * 1) TxQ[0] Cleanup - * 2) RxQ[0] Cleanup - * 3) Other (Link Status Change, etc.) - * 4) TCP Timer (optional) - */ - vector_threshold = MIN_MSIX_COUNT; - - /* The more we get, the more we will assign to Tx/Rx Cleanup - * for the separate queues...where Rx Cleanup >= Tx Cleanup. - * Right now, we simply care about how many we'll get; we'll - * set them up later while requesting irq's. - */ - while (vectors >= vector_threshold) { - err = pci_enable_msix(adapter->pdev, adapter->msix_entries, - vectors); - if (!err) /* Success in acquiring all requested vectors. */ - break; - else if (err < 0) - vectors = 0; /* Nasty failure, quit now */ - else /* err == number of vectors we should try again with */ - vectors = err; - } - - if (vectors < vector_threshold) { - /* Can't allocate enough MSI-X interrupts? Oh well. - * This just means we'll go with either a single MSI - * vector or fall back to legacy interrupts. - */ - netif_printk(adapter, hw, KERN_DEBUG, adapter->netdev, - "Unable to allocate MSI-X interrupts\n"); - adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - } else { - adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; /* Woot! */ - /* - * Adjust for only the vectors we'll use, which is minimum - * of max_msix_q_vectors + NON_Q_VECTORS, or the number of - * vectors we were allocated. - */ - adapter->num_msix_vectors = min(vectors, - adapter->max_msix_q_vectors + NON_Q_VECTORS); - } -} - -/** - * ixgbe_cache_ring_rss - Descriptor ring to register mapping for RSS - * @adapter: board private structure to initialize - * - * Cache the descriptor ring offsets for RSS to the assigned rings. - * - **/ -static inline bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter) -{ - int i; - - if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) - return false; - - for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i]->reg_idx = i; - for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i]->reg_idx = i; - - return true; -} - -#ifdef CONFIG_IXGBE_DCB - -/* ixgbe_get_first_reg_idx - Return first register index associated with ring */ -static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc, - unsigned int *tx, unsigned int *rx) -{ - struct net_device *dev = adapter->netdev; - struct ixgbe_hw *hw = &adapter->hw; - u8 num_tcs = netdev_get_num_tc(dev); - - *tx = 0; - *rx = 0; - - switch (hw->mac.type) { - case ixgbe_mac_82598EB: - *tx = tc << 2; - *rx = tc << 3; - break; - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - if (num_tcs > 4) { - if (tc < 3) { - *tx = tc << 5; - *rx = tc << 4; - } else if (tc < 5) { - *tx = ((tc + 2) << 4); - *rx = tc << 4; - } else if (tc < num_tcs) { - *tx = ((tc + 8) << 3); - *rx = tc << 4; - } - } else { - *rx = tc << 5; - switch (tc) { - case 0: - *tx = 0; - break; - case 1: - *tx = 64; - break; - case 2: - *tx = 96; - break; - case 3: - *tx = 112; - break; - default: - break; - } - } - break; - default: - break; - } -} - -/** - * ixgbe_cache_ring_dcb - Descriptor ring to register mapping for DCB - * @adapter: board private structure to initialize - * - * Cache the descriptor ring offsets for DCB to the assigned rings. - * - **/ -static inline bool ixgbe_cache_ring_dcb(struct ixgbe_adapter *adapter) -{ - struct net_device *dev = adapter->netdev; - int i, j, k; - u8 num_tcs = netdev_get_num_tc(dev); - - if (!num_tcs) - return false; - - for (i = 0, k = 0; i < num_tcs; i++) { - unsigned int tx_s, rx_s; - u16 count = dev->tc_to_txq[i].count; - - ixgbe_get_first_reg_idx(adapter, i, &tx_s, &rx_s); - for (j = 0; j < count; j++, k++) { - adapter->tx_ring[k]->reg_idx = tx_s + j; - adapter->rx_ring[k]->reg_idx = rx_s + j; - adapter->tx_ring[k]->dcb_tc = i; - adapter->rx_ring[k]->dcb_tc = i; - } - } - - return true; -} -#endif - -/** - * ixgbe_cache_ring_fdir - Descriptor ring to register mapping for Flow Director - * @adapter: board private structure to initialize - * - * Cache the descriptor ring offsets for Flow Director to the assigned rings. - * - **/ -static inline bool ixgbe_cache_ring_fdir(struct ixgbe_adapter *adapter) -{ - int i; - bool ret = false; - - if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) && - (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)) { - for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i]->reg_idx = i; - for (i = 0; i < adapter->num_tx_queues; i++) - adapter->tx_ring[i]->reg_idx = i; - ret = true; - } - - return ret; -} - -#ifdef IXGBE_FCOE -/** - * ixgbe_cache_ring_fcoe - Descriptor ring to register mapping for the FCoE - * @adapter: board private structure to initialize - * - * Cache the descriptor ring offsets for FCoE mode to the assigned rings. - * - */ -static inline bool ixgbe_cache_ring_fcoe(struct ixgbe_adapter *adapter) -{ - struct ixgbe_ring_feature *f = &adapter->ring_feature[RING_F_FCOE]; - int i; - u8 fcoe_rx_i = 0, fcoe_tx_i = 0; - - if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) - return false; - - if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) - ixgbe_cache_ring_fdir(adapter); - else - ixgbe_cache_ring_rss(adapter); - - fcoe_rx_i = f->mask; - fcoe_tx_i = f->mask; - } - for (i = 0; i < f->indices; i++, fcoe_rx_i++, fcoe_tx_i++) { - adapter->rx_ring[f->mask + i]->reg_idx = fcoe_rx_i; - adapter->tx_ring[f->mask + i]->reg_idx = fcoe_tx_i; - } - return true; -} - -#endif /* IXGBE_FCOE */ -/** - * ixgbe_cache_ring_sriov - Descriptor ring to register mapping for sriov - * @adapter: board private structure to initialize - * - * SR-IOV doesn't use any descriptor rings but changes the default if - * no other mapping is used. - * - */ -static inline bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter) -{ - adapter->rx_ring[0]->reg_idx = adapter->num_vfs * 2; - adapter->tx_ring[0]->reg_idx = adapter->num_vfs * 2; - if (adapter->num_vfs) - return true; - else - return false; -} - -/** - * ixgbe_cache_ring_register - Descriptor ring to register mapping - * @adapter: board private structure to initialize - * - * Once we know the feature-set enabled for the device, we'll cache - * the register offset the descriptor ring is assigned to. - * - * Note, the order the various feature calls is important. It must start with - * the "most" features enabled at the same time, then trickle down to the - * least amount of features turned on at once. - **/ -static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) -{ - /* start with default case */ - adapter->rx_ring[0]->reg_idx = 0; - adapter->tx_ring[0]->reg_idx = 0; - - if (ixgbe_cache_ring_sriov(adapter)) - return; - -#ifdef CONFIG_IXGBE_DCB - if (ixgbe_cache_ring_dcb(adapter)) - return; -#endif - -#ifdef IXGBE_FCOE - if (ixgbe_cache_ring_fcoe(adapter)) - return; -#endif /* IXGBE_FCOE */ - - if (ixgbe_cache_ring_fdir(adapter)) - return; - - if (ixgbe_cache_ring_rss(adapter)) - return; -} - -/** - * ixgbe_alloc_queues - Allocate memory for all rings - * @adapter: board private structure to initialize - * - * We allocate one ring per queue at run-time since we don't know the - * number of queues at compile-time. The polling_netdev array is - * intended for Multiqueue, but should work fine with a single queue. - **/ -static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter) -{ - int rx = 0, tx = 0, nid = adapter->node; - - if (nid < 0 || !node_online(nid)) - nid = first_online_node; - - for (; tx < adapter->num_tx_queues; tx++) { - struct ixgbe_ring *ring; - - ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, nid); - if (!ring) - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_allocation; - ring->count = adapter->tx_ring_count; - ring->queue_index = tx; - ring->numa_node = nid; - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - - adapter->tx_ring[tx] = ring; - } - - for (; rx < adapter->num_rx_queues; rx++) { - struct ixgbe_ring *ring; - - ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, nid); - if (!ring) - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_allocation; - ring->count = adapter->rx_ring_count; - ring->queue_index = rx; - ring->numa_node = nid; - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - - adapter->rx_ring[rx] = ring; - } - - ixgbe_cache_ring_register(adapter); - - return 0; - -err_allocation: - while (tx) - kfree(adapter->tx_ring[--tx]); - - while (rx) - kfree(adapter->rx_ring[--rx]); - return -ENOMEM; -} - -/** - * ixgbe_set_interrupt_capability - set MSI-X or MSI if supported - * @adapter: board private structure to initialize - * - * Attempt to configure the interrupts using the best available - * capabilities of the hardware and the kernel. - **/ -static int ixgbe_set_interrupt_capability(struct ixgbe_adapter *adapter) -{ - struct ixgbe_hw *hw = &adapter->hw; - int err = 0; - int vector, v_budget; - - /* - * It's easy to be greedy for MSI-X vectors, but it really - * doesn't do us much good if we have a lot more vectors - * than CPU's. So let's be conservative and only ask for - * (roughly) the same number of vectors as there are CPU's. - */ - v_budget = min(adapter->num_rx_queues + adapter->num_tx_queues, - (int)num_online_cpus()) + NON_Q_VECTORS; - - /* - * At the same time, hardware can only support a maximum of - * hw.mac->max_msix_vectors vectors. With features - * such as RSS and VMDq, we can easily surpass the number of Rx and Tx - * descriptor queues supported by our device. Thus, we cap it off in - * those rare cases where the cpu count also exceeds our vector limit. - */ - v_budget = min(v_budget, (int)hw->mac.max_msix_vectors); - - /* A failure in MSI-X entry allocation isn't fatal, but it does - * mean we disable MSI-X capabilities of the adapter. */ - adapter->msix_entries = kcalloc(v_budget, - sizeof(struct msix_entry), GFP_KERNEL); - if (adapter->msix_entries) { - for (vector = 0; vector < v_budget; vector++) - adapter->msix_entries[vector].entry = vector; - - ixgbe_acquire_msix_vectors(adapter, v_budget); - - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) - goto out; - } - - adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; - adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { - e_err(probe, - "ATR is not supported while multiple " - "queues are disabled. Disabling Flow Director\n"); - } - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - adapter->atr_sample_rate = 0; - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - ixgbe_disable_sriov(adapter); - - err = ixgbe_set_num_queues(adapter); - if (err) - return err; - - err = pci_enable_msi(adapter->pdev); - if (!err) { - adapter->flags |= IXGBE_FLAG_MSI_ENABLED; - } else { - netif_printk(adapter, hw, KERN_DEBUG, adapter->netdev, - "Unable to allocate MSI interrupt, " - "falling back to legacy. Error: %d\n", err); - /* reset err */ - err = 0; - } - -out: - return err; -} - -/** - * ixgbe_alloc_q_vectors - Allocate memory for interrupt vectors - * @adapter: board private structure to initialize - * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. - **/ -static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter) -{ - int v_idx, num_q_vectors; - struct ixgbe_q_vector *q_vector; - - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) - num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - else - num_q_vectors = 1; - - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - q_vector = kzalloc_node(sizeof(struct ixgbe_q_vector), - GFP_KERNEL, adapter->node); - if (!q_vector) - q_vector = kzalloc(sizeof(struct ixgbe_q_vector), - GFP_KERNEL); - if (!q_vector) - goto err_out; - - q_vector->adapter = adapter; - q_vector->v_idx = v_idx; - - /* Allocate the affinity_hint cpumask, configure the mask */ - if (!alloc_cpumask_var(&q_vector->affinity_mask, GFP_KERNEL)) - goto err_out; - cpumask_set_cpu(v_idx, q_vector->affinity_mask); - netif_napi_add(adapter->netdev, &q_vector->napi, - ixgbe_poll, 64); - adapter->q_vector[v_idx] = q_vector; - } - - return 0; - -err_out: - while (v_idx) { - v_idx--; - q_vector = adapter->q_vector[v_idx]; - netif_napi_del(&q_vector->napi); - free_cpumask_var(q_vector->affinity_mask); - kfree(q_vector); - adapter->q_vector[v_idx] = NULL; - } - return -ENOMEM; -} - -/** - * ixgbe_free_q_vectors - Free memory allocated for interrupt vectors - * @adapter: board private structure to initialize - * - * This function frees the memory allocated to the q_vectors. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. - **/ -static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter) -{ - int v_idx, num_q_vectors; - - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) - num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - else - num_q_vectors = 1; - - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - struct ixgbe_q_vector *q_vector = adapter->q_vector[v_idx]; - adapter->q_vector[v_idx] = NULL; - netif_napi_del(&q_vector->napi); - free_cpumask_var(q_vector->affinity_mask); - kfree(q_vector); - } -} - -static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) -{ - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { - adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; - pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { - adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED; - pci_disable_msi(adapter->pdev); - } -} - -/** - * ixgbe_init_interrupt_scheme - Determine proper interrupt scheme - * @adapter: board private structure to initialize - * - * We determine which interrupt scheme to use based on... - * - Kernel support (MSI, MSI-X) - * - which can be user-defined (via MODULE_PARAM) - * - Hardware queue count (num_*_queues) - * - defined by miscellaneous hardware support/features (RSS, etc.) - **/ -int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) -{ - int err; - - /* Number of supported queues */ - err = ixgbe_set_num_queues(adapter); - if (err) - return err; - - err = ixgbe_set_interrupt_capability(adapter); - if (err) { - e_dev_err("Unable to setup interrupt capabilities\n"); - goto err_set_interrupt; - } - - err = ixgbe_alloc_q_vectors(adapter); - if (err) { - e_dev_err("Unable to allocate memory for queue vectors\n"); - goto err_alloc_q_vectors; - } - - err = ixgbe_alloc_queues(adapter); - if (err) { - e_dev_err("Unable to allocate memory for queues\n"); - goto err_alloc_queues; - } - - e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n", - (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled", - adapter->num_rx_queues, adapter->num_tx_queues); - - set_bit(__IXGBE_DOWN, &adapter->state); - - return 0; - -err_alloc_queues: - ixgbe_free_q_vectors(adapter); -err_alloc_q_vectors: - ixgbe_reset_interrupt_capability(adapter); -err_set_interrupt: - return err; -} - -/** - * ixgbe_clear_interrupt_scheme - Clear the current interrupt scheme settings - * @adapter: board private structure to clear interrupt scheme on - * - * We go through and clear interrupt specific resources and reset the structure - * to pre-load conditions - **/ -void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter) -{ - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) { - kfree(adapter->tx_ring[i]); - adapter->tx_ring[i] = NULL; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *ring = adapter->rx_ring[i]; - - /* ixgbe_get_stats64() might access this ring, we must wait - * a grace period before freeing it. - */ - kfree_rcu(ring, rcu); - adapter->rx_ring[i] = NULL; - } - - adapter->num_tx_queues = 0; - adapter->num_rx_queues = 0; - - ixgbe_free_q_vectors(adapter); - ixgbe_reset_interrupt_capability(adapter); -} - -/** * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter) * @adapter: board private structure to initialize * @@ -4937,11 +5125,12 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter) * Fields are initialized based on PCI device information and * OS network device settings (MTU size). **/ -static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) +static int ixgbe_sw_init(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - unsigned int rss; + unsigned int rss, fdir; + u32 fwsm; #ifdef CONFIG_IXGBE_DCB int j; struct tc_configuration *tc; @@ -4955,44 +5144,71 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) hw->subsystem_vendor_id = pdev->subsystem_vendor; hw->subsystem_device_id = pdev->subsystem_device; - /* Set capability flags */ - rss = min(IXGBE_MAX_RSS_INDICES, (int)num_online_cpus()); - adapter->ring_feature[RING_F_RSS].indices = rss; - adapter->flags |= IXGBE_FLAG_RSS_ENABLED; + /* Set common capability flags and settings */ + rss = min_t(int, IXGBE_MAX_RSS_INDICES, num_online_cpus()); + adapter->ring_feature[RING_F_RSS].limit = rss; + adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + adapter->max_q_vectors = MAX_Q_VECTORS_82599; + adapter->atr_sample_rate = 20; + fdir = min_t(int, IXGBE_MAX_FDIR_INDICES, num_online_cpus()); + adapter->ring_feature[RING_F_FDIR].limit = fdir; + adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K; +#ifdef CONFIG_IXGBE_DCA + adapter->flags |= IXGBE_FLAG_DCA_CAPABLE; +#endif +#ifdef IXGBE_FCOE + adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE; + adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; +#ifdef CONFIG_IXGBE_DCB + /* Default traffic class to use for FCoE */ + adapter->fcoe.up = IXGBE_FCOE_DEFTC; +#endif /* CONFIG_IXGBE_DCB */ +#endif /* IXGBE_FCOE */ + + adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * + hw->mac.num_rar_entries, + GFP_ATOMIC); + + /* Set MAC specific capability flags and exceptions */ switch (hw->mac.type) { case ixgbe_mac_82598EB: + adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE; + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; + if (hw->device_id == IXGBE_DEV_ID_82598AT) adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE; - adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82598; - break; - case ixgbe_mac_X540: - adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; - case ixgbe_mac_82599EB: - adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82599; - adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; - adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; - if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) - adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; - /* Flow Director hash filters enabled */ - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; - adapter->atr_sample_rate = 20; - adapter->ring_feature[RING_F_FDIR].indices = - IXGBE_MAX_FDIR_INDICES; - adapter->fdir_pballoc = IXGBE_FDIR_PBALLOC_64K; + + adapter->max_q_vectors = MAX_Q_VECTORS_82598; + adapter->ring_feature[RING_F_FDIR].limit = 0; + adapter->atr_sample_rate = 0; + adapter->fdir_pballoc = 0; #ifdef IXGBE_FCOE - adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE; + adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE; adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; - adapter->ring_feature[RING_F_FCOE].indices = 0; #ifdef CONFIG_IXGBE_DCB - /* Default traffic class to use for FCoE */ - adapter->fcoe.up = IXGBE_FCOE_DEFTC; -#endif + adapter->fcoe.up = 0; +#endif /* IXGBE_DCB */ #endif /* IXGBE_FCOE */ break; + case ixgbe_mac_82599EB: + if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; + break; + case ixgbe_mac_X540: + fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); + if (fwsm & IXGBE_FWSM_TS_ENABLED) + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; + break; default: break; } +#ifdef IXGBE_FCOE + /* FCoE support exists, always init the FCoE lock */ + spin_lock_init(&adapter->fcoe.lock); + +#endif /* n-tuple support exists, always init our spinlock */ spin_lock_init(&adapter->fdir_perfect_lock); @@ -5028,30 +5244,38 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) adapter->dcb_cfg.pfc_mode_enable = false; adapter->dcb_set_bitmap = 0x00; adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE; - ixgbe_copy_dcb_cfg(&adapter->dcb_cfg, &adapter->temp_dcb_cfg, - MAX_TRAFFIC_CLASS); + memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg, + sizeof(adapter->temp_dcb_cfg)); #endif /* default flow control settings */ hw->fc.requested_mode = ixgbe_fc_full; hw->fc.current_mode = ixgbe_fc_full; /* init for ethtool output */ -#ifdef CONFIG_DCB - adapter->last_lfc_mode = hw->fc.current_mode; -#endif ixgbe_pbthresh_setup(adapter); hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE; hw->fc.send_xon = true; - hw->fc.disable_fc_autoneg = false; + hw->fc.disable_fc_autoneg = ixgbe_device_supports_autoneg_fc(hw); + +#ifdef CONFIG_PCI_IOV + if (max_vfs > 0) + e_dev_warn("Enabling SR-IOV VFs using the max_vfs module parameter is deprecated - please use the pci sysfs interface instead.\n"); + + /* assign number of SR-IOV VFs */ + if (hw->mac.type != ixgbe_mac_82598EB) { + if (max_vfs > IXGBE_MAX_VFS_DRV_LIMIT) { + adapter->num_vfs = 0; + e_dev_warn("max_vfs parameter out of range. Not assigning any SR-IOV VFs\n"); + } else { + adapter->num_vfs = max_vfs; + } + } +#endif /* CONFIG_PCI_IOV */ /* enable itr by default in dynamic mode */ adapter->rx_itr_setting = 1; adapter->tx_itr_setting = 1; - /* set defaults for eitr in MegaBytes */ - adapter->eitr_low = 10; - adapter->eitr_high = 20; - /* set default ring sizes */ adapter->tx_ring_count = IXGBE_DEFAULT_TXD; adapter->rx_ring_count = IXGBE_DEFAULT_RXD; @@ -5065,12 +5289,8 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) return -EIO; } - /* enable rx csum by default */ - adapter->flags |= IXGBE_FLAG_RX_CSUM_ENABLED; - - /* get assigned NUMA node */ - adapter->node = dev_to_node(&pdev->dev); - + /* PF holds first pool slot */ + set_bit(0, &adapter->fwd_bitmask); set_bit(__IXGBE_DOWN, &adapter->state); return 0; @@ -5085,21 +5305,36 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) { struct device *dev = tx_ring->dev; + int orig_node = dev_to_node(dev); + int numa_node = -1; int size; size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); + + if (tx_ring->q_vector) + numa_node = tx_ring->q_vector->numa_node; + + tx_ring->tx_buffer_info = vzalloc_node(size, numa_node); if (!tx_ring->tx_buffer_info) tx_ring->tx_buffer_info = vzalloc(size); if (!tx_ring->tx_buffer_info) goto err; + u64_stats_init(&tx_ring->syncp); + /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); - tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, - &tx_ring->dma, GFP_KERNEL); + set_dev_node(dev, numa_node); + tx_ring->desc = dma_alloc_coherent(dev, + tx_ring->size, + &tx_ring->dma, + GFP_KERNEL); + set_dev_node(dev, orig_node); + if (!tx_ring->desc) + tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) goto err; @@ -5132,10 +5367,16 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) err = ixgbe_setup_tx_resources(adapter->tx_ring[i]); if (!err) continue; + e_err(probe, "Allocation for Tx Queue %u failed\n", i); - break; + goto err_setup_tx; } + return 0; +err_setup_tx: + /* rewind the index freeing the rings as we go */ + while (i--) + ixgbe_free_tx_resources(adapter->tx_ring[i]); return err; } @@ -5148,22 +5389,36 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) { struct device *dev = rx_ring->dev; + int orig_node = dev_to_node(dev); + int numa_node = -1; int size; size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); + + if (rx_ring->q_vector) + numa_node = rx_ring->q_vector->numa_node; + + rx_ring->rx_buffer_info = vzalloc_node(size, numa_node); if (!rx_ring->rx_buffer_info) rx_ring->rx_buffer_info = vzalloc(size); if (!rx_ring->rx_buffer_info) goto err; + u64_stats_init(&rx_ring->syncp); + /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, - &rx_ring->dma, GFP_KERNEL); - + set_dev_node(dev, numa_node); + rx_ring->desc = dma_alloc_coherent(dev, + rx_ring->size, + &rx_ring->dma, + GFP_KERNEL); + set_dev_node(dev, orig_node); + if (!rx_ring->desc) + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) goto err; @@ -5196,10 +5451,20 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter) err = ixgbe_setup_rx_resources(adapter->rx_ring[i]); if (!err) continue; + e_err(probe, "Allocation for Rx Queue %u failed\n", i); - break; + goto err_setup_rx; } +#ifdef IXGBE_FCOE + err = ixgbe_setup_fcoe_ddp_resources(adapter); + if (!err) +#endif + return 0; +err_setup_rx: + /* rewind the index freeing the rings as we go */ + while (i--) + ixgbe_free_rx_resources(adapter->rx_ring[i]); return err; } @@ -5274,6 +5539,10 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) { int i; +#ifdef IXGBE_FCOE + ixgbe_free_fcoe_ddp_resources(adapter); + +#endif for (i = 0; i < adapter->num_rx_queues; i++) if (adapter->rx_ring[i]->desc) ixgbe_free_rx_resources(adapter->rx_ring[i]); @@ -5289,20 +5558,24 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; /* MTU < 68 is an error and causes problems on some kernels */ - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED && - hw->mac.type != ixgbe_mac_X540) { - if ((new_mtu < 68) || (max_frame > MAXIMUM_ETHERNET_VLAN_SIZE)) - return -EINVAL; - } else { - if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) - return -EINVAL; - } + if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE)) + return -EINVAL; + + /* + * For 82599EB we cannot allow legacy VFs to enable their receive + * paths when MTU greater than 1500 is configured. So display a + * warning that legacy VFs will be disabled. + */ + if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && + (adapter->hw.mac.type == ixgbe_mac_82599EB) && + (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN))) + e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n"); e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); + /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; @@ -5327,7 +5600,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) static int ixgbe_open(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - int err; + int err, queues; /* disallow open during test */ if (test_bit(__IXGBE_TESTING, &adapter->state)) @@ -5351,20 +5624,54 @@ static int ixgbe_open(struct net_device *netdev) if (err) goto err_req_irq; + /* Notify the stack of the actual queue counts. */ + if (adapter->num_rx_pools > 1) + queues = adapter->num_rx_queues_per_pool; + else + queues = adapter->num_tx_queues; + + err = netif_set_real_num_tx_queues(netdev, queues); + if (err) + goto err_set_queues; + + if (adapter->num_rx_pools > 1 && + adapter->num_rx_queues > IXGBE_MAX_L2A_QUEUES) + queues = IXGBE_MAX_L2A_QUEUES; + else + queues = adapter->num_rx_queues; + err = netif_set_real_num_rx_queues(netdev, queues); + if (err) + goto err_set_queues; + + ixgbe_ptp_init(adapter); + ixgbe_up_complete(adapter); return 0; +err_set_queues: + ixgbe_free_irq(adapter); err_req_irq: -err_setup_rx: ixgbe_free_all_rx_resources(adapter); -err_setup_tx: +err_setup_rx: ixgbe_free_all_tx_resources(adapter); +err_setup_tx: ixgbe_reset(adapter); return err; } +static void ixgbe_close_suspend(struct ixgbe_adapter *adapter) +{ + ixgbe_ptp_suspend(adapter); + + ixgbe_down(adapter); + ixgbe_free_irq(adapter); + + ixgbe_free_all_tx_resources(adapter); + ixgbe_free_all_rx_resources(adapter); +} + /** * ixgbe_close - Disables a network interface * @netdev: network interface device structure @@ -5380,13 +5687,11 @@ static int ixgbe_close(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - ixgbe_down(adapter); - ixgbe_free_irq(adapter); + ixgbe_ptp_stop(adapter); - ixgbe_fdir_filter_exit(adapter); + ixgbe_close_suspend(adapter); - ixgbe_free_all_tx_resources(adapter); - ixgbe_free_all_rx_resources(adapter); + ixgbe_fdir_filter_exit(adapter); ixgbe_release_hw_control(adapter); @@ -5400,6 +5705,7 @@ static int ixgbe_resume(struct pci_dev *pdev) struct net_device *netdev = adapter->netdev; u32 err; + adapter->hw.hw_addr = adapter->io_addr; pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); /* @@ -5413,25 +5719,25 @@ static int ixgbe_resume(struct pci_dev *pdev) e_dev_err("Cannot enable PCI device from suspend\n"); return err; } + smp_mb__before_atomic(); + clear_bit(__IXGBE_DISABLED, &adapter->state); pci_set_master(pdev); pci_wake_from_d3(pdev, false); - err = ixgbe_init_interrupt_scheme(adapter); - if (err) { - e_dev_err("Cannot initialize interrupts for device\n"); - return err; - } - ixgbe_reset(adapter); IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0); - if (netif_running(netdev)) { + rtnl_lock(); + err = ixgbe_init_interrupt_scheme(adapter); + if (!err && netif_running(netdev)) err = ixgbe_open(netdev); - if (err) - return err; - } + + rtnl_unlock(); + + if (err) + return err; netif_device_attach(netdev); @@ -5452,18 +5758,12 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) netif_device_detach(netdev); - if (netif_running(netdev)) { - ixgbe_down(adapter); - ixgbe_free_irq(adapter); - ixgbe_free_all_tx_resources(adapter); - ixgbe_free_all_rx_resources(adapter); - } + rtnl_lock(); + if (netif_running(netdev)) + ixgbe_close_suspend(adapter); + rtnl_unlock(); ixgbe_clear_interrupt_scheme(adapter); -#ifdef CONFIG_DCB - kfree(adapter->ixgbe_ieee_pfc); - kfree(adapter->ixgbe_ieee_ets); -#endif #ifdef CONFIG_PM retval = pci_save_state(pdev); @@ -5471,9 +5771,16 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) return retval; #endif + if (hw->mac.ops.stop_link_on_d3) + hw->mac.ops.stop_link_on_d3(hw); + if (wufc) { ixgbe_set_rx_mode(netdev); + /* enable the optics for 82599 SFP+ fiber as we can WoL */ + if (hw->mac.ops.enable_tx_laser) + hw->mac.ops.enable_tx_laser(hw); + /* turn on all-multi mode if wake on multicast is enabled */ if (wufc & IXGBE_WUFC_MC) { fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); @@ -5507,7 +5814,8 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) ixgbe_release_hw_control(adapter); - pci_disable_device(pdev); + if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state)) + pci_disable_device(pdev); return 0; } @@ -5558,12 +5866,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot; u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0; u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0; - u64 bytes = 0, packets = 0; -#ifdef IXGBE_FCOE - struct ixgbe_fcoe *fcoe = &adapter->fcoe; - unsigned int cpu; - u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0; -#endif /* IXGBE_FCOE */ + u64 bytes = 0, packets = 0, hw_csum_rx_error = 0; if (test_bit(__IXGBE_DOWN, &adapter->state) || test_bit(__IXGBE_RESETTING, &adapter->state)) @@ -5572,9 +5875,6 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { u64 rsc_count = 0; u64 rsc_flush = 0; - for (i = 0; i < 16; i++) - adapter->hw_rx_no_dma_resources += - IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); for (i = 0; i < adapter->num_rx_queues; i++) { rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count; rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush; @@ -5588,12 +5888,14 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) non_eop_descs += rx_ring->rx_stats.non_eop_descs; alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed; alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed; + hw_csum_rx_error += rx_ring->rx_stats.csum_err; bytes += rx_ring->stats.bytes; packets += rx_ring->stats.packets; } adapter->non_eop_descs = non_eop_descs; adapter->alloc_rx_page_failed = alloc_rx_page_failed; adapter->alloc_rx_buff_failed = alloc_rx_buff_failed; + adapter->hw_csum_rx_error = hw_csum_rx_error; netdev->stats.rx_bytes = bytes; netdev->stats.rx_packets = packets; @@ -5675,6 +5977,9 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC); hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC); case ixgbe_mac_82599EB: + for (i = 0; i < 16; i++) + adapter->hw_rx_no_dma_resources += + IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL); IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */ hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL); @@ -5692,17 +5997,19 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); /* Add up per cpu counters for total ddp aloc fail */ - if (fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) { + if (adapter->fcoe.ddp_pool) { + struct ixgbe_fcoe *fcoe = &adapter->fcoe; + struct ixgbe_fcoe_ddp_pool *ddp_pool; + unsigned int cpu; + u64 noddp = 0, noddp_ext_buff = 0; for_each_possible_cpu(cpu) { - fcoe_noddp_counts_sum += - *per_cpu_ptr(fcoe->pcpu_noddp, cpu); - fcoe_noddp_ext_buff_counts_sum += - *per_cpu_ptr(fcoe-> - pcpu_noddp_ext_buff, cpu); + ddp_pool = per_cpu_ptr(fcoe->ddp_pool, cpu); + noddp += ddp_pool->noddp; + noddp_ext_buff += ddp_pool->noddp_ext_buff; } + hwstats->fcoe_noddp = noddp; + hwstats->fcoe_noddp_ext_buff = noddp_ext_buff; } - hwstats->fcoe_noddp = fcoe_noddp_counts_sum; - hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum; #endif /* IXGBE_FCOE */ break; default: @@ -5760,7 +6067,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) /** * ixgbe_fdir_reinit_subtask - worker thread to reinit FDIR filter table - * @adapter - pointer to the device adapter structure + * @adapter: pointer to the device adapter structure **/ static void ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter) { @@ -5785,7 +6092,7 @@ static void ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter) if (ixgbe_reinit_fdir_tables_82599(hw) == 0) { for (i = 0; i < adapter->num_tx_queues; i++) set_bit(__IXGBE_TX_FDIR_INIT_DONE, - &(adapter->tx_ring[i]->state)); + &(adapter->tx_ring[i]->state)); /* re-enable flow director interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR); } else { @@ -5796,7 +6103,7 @@ static void ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter) /** * ixgbe_check_hang_subtask - check for hung queues and dropped interrupts - * @adapter - pointer to the device adapter structure + * @adapter: pointer to the device adapter structure * * This function serves two purposes. First it strobes the interrupt lines * in order to make certain interrupts are occurring. Secondly it sets the @@ -5809,8 +6116,9 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter) u64 eics = 0; int i; - /* If we're down or resetting, just bail */ + /* If we're down, removing or resetting, just bail */ if (test_bit(__IXGBE_DOWN, &adapter->state) || + test_bit(__IXGBE_REMOVING, &adapter->state) || test_bit(__IXGBE_RESETTING, &adapter->state)) return; @@ -5830,7 +6138,7 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter) (IXGBE_EICS_TCP_TIMER | IXGBE_EICS_OTHER)); } else { /* get one bit for every active tx/rx interrupt vector */ - for (i = 0; i < adapter->num_msix_vectors - NON_Q_VECTORS; i++) { + for (i = 0; i < adapter->num_q_vectors; i++) { struct ixgbe_q_vector *qv = adapter->q_vector[i]; if (qv->rx.ring || qv->tx.ring) eics |= ((u64)1 << i); @@ -5844,15 +6152,15 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter) /** * ixgbe_watchdog_update_link - update the link status - * @adapter - pointer to the device adapter structure - * @link_speed - pointer to a u32 to store the link_speed + * @adapter: pointer to the device adapter structure + * @link_speed: pointer to a u32 to store the link_speed **/ static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 link_speed = adapter->link_speed; bool link_up = adapter->link_up; - int i; + bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)) return; @@ -5864,13 +6172,13 @@ static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter) link_speed = IXGBE_LINK_SPEED_10GB_FULL; link_up = true; } - if (link_up) { - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { - for (i = 0; i < MAX_TRAFFIC_CLASS; i++) - hw->mac.ops.fc_enable(hw, i); - } else { - hw->mac.ops.fc_enable(hw, 0); - } + + if (adapter->ixgbe_ieee_pfc) + pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); + + if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) { + hw->mac.ops.fc_enable(hw); + ixgbe_set_rx_drop_en(adapter); } if (link_up || @@ -5885,15 +6193,34 @@ static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter) adapter->link_speed = link_speed; } +static void ixgbe_update_default_up(struct ixgbe_adapter *adapter) +{ +#ifdef CONFIG_IXGBE_DCB + struct net_device *netdev = adapter->netdev; + struct dcb_app app = { + .selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE, + .protocol = 0, + }; + u8 up = 0; + + if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) + up = dcb_ieee_getapp_mask(netdev, &app); + + adapter->default_up = (up > 1) ? (ffs(up) - 1) : 0; +#endif +} + /** * ixgbe_watchdog_link_is_up - update netif_carrier status and * print link up message - * @adapter - pointer to the device adapter structure + * @adapter: pointer to the device adapter structure **/ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct ixgbe_hw *hw = &adapter->hw; + struct net_device *upper; + struct list_head *iter; u32 link_speed = adapter->link_speed; bool flow_rx, flow_tx; @@ -5924,6 +6251,12 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) flow_rx = false; break; } + + adapter->last_rx_ptp_check = jiffies; + + if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) + ixgbe_ptp_start_cyclecounter(adapter); + e_info(drv, "NIC Link is Up %s, Flow Control: %s\n", (link_speed == IXGBE_LINK_SPEED_10GB_FULL ? "10 Gbps" : @@ -5938,14 +6271,35 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) netif_carrier_on(netdev); ixgbe_check_vf_rate_limit(adapter); + + /* enable transmits */ + netif_tx_wake_all_queues(adapter->netdev); + + /* enable any upper devices */ + rtnl_lock(); + netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) { + if (netif_is_macvlan(upper)) { + struct macvlan_dev *vlan = netdev_priv(upper); + + if (vlan->fwd_priv) + netif_tx_wake_all_queues(upper); + } + } + rtnl_unlock(); + + /* update the default user priority for VFs */ + ixgbe_update_default_up(adapter); + + /* ping all the active vfs to let them know link has changed */ + ixgbe_ping_all_vfs(adapter); } /** * ixgbe_watchdog_link_is_down - update netif_carrier status and * print link down message - * @adapter - pointer to the adapter structure + * @adapter: pointer to the adapter structure **/ -static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter* adapter) +static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct ixgbe_hw *hw = &adapter->hw; @@ -5961,13 +6315,19 @@ static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter* adapter) if (ixgbe_is_sfp(hw) && hw->mac.type == ixgbe_mac_82598EB) adapter->flags2 |= IXGBE_FLAG2_SEARCH_FOR_SFP; + if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) + ixgbe_ptp_start_cyclecounter(adapter); + e_info(drv, "NIC Link is Down\n"); netif_carrier_off(netdev); + + /* ping all the active vfs to let them know link has changed */ + ixgbe_ping_all_vfs(adapter); } /** * ixgbe_watchdog_flush_tx - flush queues on link down - * @adapter - pointer to the device adapter structure + * @adapter: pointer to the device adapter structure **/ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter) { @@ -5989,6 +6349,7 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter) * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context). */ + e_warn(drv, "initiating reset to clear Tx work after link loss\n"); adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; } } @@ -5998,8 +6359,9 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter) { u32 ssvpc; - /* Do not perform spoof check for 82598 */ - if (adapter->hw.mac.type == ixgbe_mac_82598EB) + /* Do not perform spoof check for 82598 or if not in IOV mode */ + if (adapter->hw.mac.type == ixgbe_mac_82598EB || + adapter->num_vfs == 0) return; ssvpc = IXGBE_READ_REG(&adapter->hw, IXGBE_SSVPC); @@ -6011,17 +6373,18 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter) if (!ssvpc) return; - e_warn(drv, "%d Spoofed packets detected\n", ssvpc); + e_warn(drv, "%u Spoofed packets detected\n", ssvpc); } /** * ixgbe_watchdog_subtask - check and bring link up - * @adapter - pointer to the device adapter structure + * @adapter: pointer to the device adapter structure **/ static void ixgbe_watchdog_subtask(struct ixgbe_adapter *adapter) { - /* if interface is down do nothing */ + /* if interface is down, removing or resetting, do nothing */ if (test_bit(__IXGBE_DOWN, &adapter->state) || + test_bit(__IXGBE_REMOVING, &adapter->state) || test_bit(__IXGBE_RESETTING, &adapter->state)) return; @@ -6040,7 +6403,7 @@ static void ixgbe_watchdog_subtask(struct ixgbe_adapter *adapter) /** * ixgbe_sfp_detection_subtask - poll for SFP+ cable - * @adapter - the ixgbe adapter structure + * @adapter: the ixgbe adapter structure **/ static void ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter) { @@ -6107,13 +6470,13 @@ sfp_out: /** * ixgbe_sfp_link_config_subtask - set up link SFP after module install - * @adapter - the ixgbe adapter structure + * @adapter: the ixgbe adapter structure **/ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; - u32 autoneg; - bool negotiation; + u32 speed; + bool autoneg = false; if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_CONFIG)) return; @@ -6124,11 +6487,19 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter) adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG; - autoneg = hw->phy.autoneg_advertised; - if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) - hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiation); + speed = hw->phy.autoneg_advertised; + if ((!speed) && (hw->mac.ops.get_link_capabilities)) { + hw->mac.ops.get_link_capabilities(hw, &speed, &autoneg); + + /* setup the highest link when no autoneg */ + if (!autoneg) { + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + speed = IXGBE_LINK_SPEED_10GB_FULL; + } + } + if (hw->mac.ops.setup_link) - hw->mac.ops.setup_link(hw, autoneg, negotiation, true); + hw->mac.ops.setup_link(hw, speed, true); adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; adapter->link_check_timeout = jiffies; @@ -6190,41 +6561,32 @@ static void ixgbe_service_timer(unsigned long data) unsigned long next_event_offset; bool ready = true; -#ifdef CONFIG_PCI_IOV - ready = false; + /* poll faster when waiting for link */ + if (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE) + next_event_offset = HZ / 10; + else + next_event_offset = HZ * 2; +#ifdef CONFIG_PCI_IOV /* * don't bother with SR-IOV VF DMA hang check if there are * no VFs or the link is down */ if (!adapter->num_vfs || - (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)) { - ready = true; + (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)) goto normal_timer_service; - } /* If we have VFs allocated then we must check for DMA hangs */ ixgbe_check_for_bad_vf(adapter); next_event_offset = HZ / 50; adapter->timer_event_accumulator++; - if (adapter->timer_event_accumulator >= 100) { - ready = true; + if (adapter->timer_event_accumulator >= 100) adapter->timer_event_accumulator = 0; - } - - goto schedule_event; - -normal_timer_service: -#endif - /* poll faster when waiting for link */ - if (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE) - next_event_offset = HZ / 10; else - next_event_offset = HZ * 2; + ready = false; -#ifdef CONFIG_PCI_IOV -schedule_event: +normal_timer_service: #endif /* Reset the timer */ mod_timer(&adapter->service_timer, next_event_offset + jiffies); @@ -6240,8 +6602,9 @@ static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) adapter->flags2 &= ~IXGBE_FLAG2_RESET_REQUESTED; - /* If we're already down or resetting, just bail */ + /* If we're already down, removing or resetting, just bail */ if (test_bit(__IXGBE_DOWN, &adapter->state) || + test_bit(__IXGBE_REMOVING, &adapter->state) || test_bit(__IXGBE_RESETTING, &adapter->state)) return; @@ -6249,7 +6612,9 @@ static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) netdev_err(adapter->netdev, "Reset adapter\n"); adapter->tx_timeout_count++; + rtnl_lock(); ixgbe_reinit_locked(adapter); + rtnl_unlock(); } /** @@ -6261,7 +6626,15 @@ static void ixgbe_service_task(struct work_struct *work) struct ixgbe_adapter *adapter = container_of(work, struct ixgbe_adapter, service_task); - + if (ixgbe_removed(adapter->hw.hw_addr)) { + if (!test_bit(__IXGBE_DOWN, &adapter->state)) { + rtnl_lock(); + ixgbe_down(adapter); + rtnl_unlock(); + } + ixgbe_service_event_complete(adapter); + return; + } ixgbe_reset_subtask(adapter); ixgbe_sfp_detection_subtask(adapter); ixgbe_sfp_link_config_subtask(adapter); @@ -6270,49 +6643,37 @@ static void ixgbe_service_task(struct work_struct *work) ixgbe_fdir_reinit_subtask(adapter); ixgbe_check_hang_subtask(adapter); - ixgbe_service_event_complete(adapter); -} - -void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, - u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx) -{ - struct ixgbe_adv_tx_context_desc *context_desc; - u16 i = tx_ring->next_to_use; - - context_desc = IXGBE_TX_CTXTDESC_ADV(tx_ring, i); - - i++; - tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; - - /* set bits to identify this as an advanced context descriptor */ - type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; + if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) { + ixgbe_ptp_overflow_check(adapter); + ixgbe_ptp_rx_hang(adapter); + } - context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); - context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof); - context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); - context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); + ixgbe_service_event_complete(adapter); } -static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol, u8 *hdr_len) +static int ixgbe_tso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, + u8 *hdr_len) { - int err; + struct sk_buff *skb = first->skb; u32 vlan_macip_lens, type_tucmd; u32 mss_l4len_idx, l4len; + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; if (!skb_is_gso(skb)) return 0; - if (skb_header_cloned(skb)) { - err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (err) - return err; - } + err = skb_cow_head(skb, 0); + if (err < 0) + return err; /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ type_tucmd = IXGBE_ADVTXD_TUCMD_L4T_TCP; - if (protocol == __constant_htons(ETH_P_IP)) { + if (first->protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = 0; iph->check = 0; @@ -6321,54 +6682,63 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring, struct sk_buff *skb, IPPROTO_TCP, 0); type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; + first->tx_flags |= IXGBE_TX_FLAGS_TSO | + IXGBE_TX_FLAGS_CSUM | + IXGBE_TX_FLAGS_IPV4; } else if (skb_is_gso_v6(skb)) { ipv6_hdr(skb)->payload_len = 0; tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + first->tx_flags |= IXGBE_TX_FLAGS_TSO | + IXGBE_TX_FLAGS_CSUM; } + /* compute header lengths */ l4len = tcp_hdrlen(skb); *hdr_len = skb_transport_offset(skb) + l4len; - /* mss_l4len_id: use 1 as index for TSO */ + /* update gso size and bytecount with header size */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + + /* mss_l4len_id: use 0 as index for TSO */ mss_l4len_idx = l4len << IXGBE_ADVTXD_L4LEN_SHIFT; mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT; - mss_l4len_idx |= 1 << IXGBE_ADVTXD_IDX_SHIFT; /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ vlan_macip_lens = skb_network_header_len(skb); vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; + vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, - mss_l4len_idx); + mss_l4len_idx); return 1; } -static bool ixgbe_tx_csum(struct ixgbe_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags, - __be16 protocol) +static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first) { + struct sk_buff *skb = first->skb; u32 vlan_macip_lens = 0; u32 mss_l4len_idx = 0; u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(tx_flags & IXGBE_TX_FLAGS_HW_VLAN) && - !(tx_flags & IXGBE_TX_FLAGS_TXSW)) - return false; + if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN) && + !(first->tx_flags & IXGBE_TX_FLAGS_CC)) + return; } else { u8 l4_hdr = 0; - switch (protocol) { - case __constant_htons(ETH_P_IP): + switch (first->protocol) { + case htons(ETH_P_IP): vlan_macip_lens |= skb_network_header_len(skb); type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4; l4_hdr = ip_hdr(skb)->protocol; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): vlan_macip_lens |= skb_network_header_len(skb); l4_hdr = ipv6_hdr(skb)->nexthdr; break; @@ -6376,7 +6746,7 @@ static bool ixgbe_tx_csum(struct ixgbe_ring *tx_ring, if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, "partial checksum but proto=%x!\n", - skb->protocol); + first->protocol); } break; } @@ -6400,237 +6770,223 @@ static bool ixgbe_tx_csum(struct ixgbe_ring *tx_ring, if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, "partial checksum but l4 proto=%x!\n", - skb->protocol); + l4_hdr); } break; } + + /* update TX checksum flag */ + first->tx_flags |= IXGBE_TX_FLAGS_CSUM; } + /* vlan_macip_lens: MACLEN, VLAN tag */ vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT; - vlan_macip_lens |= tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; + vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK; ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, mss_l4len_idx); - - return (skb->ip_summed == CHECKSUM_PARTIAL); } -static __le32 ixgbe_tx_cmd_type(u32 tx_flags) +#define IXGBE_SET_FLAG(_input, _flag, _result) \ + ((_flag <= _result) ? \ + ((u32)(_input & _flag) * (_result / _flag)) : \ + ((u32)(_input & _flag) / (_flag / _result))) + +static u32 ixgbe_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ - __le32 cmd_type = cpu_to_le32(IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_IFCS | - IXGBE_ADVTXD_DCMD_DEXT); + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS; /* set HW vlan bit if vlan is present */ - if (tx_flags & IXGBE_TX_FLAGS_HW_VLAN) - cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_VLE); + cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_HW_VLAN, + IXGBE_ADVTXD_DCMD_VLE); /* set segmentation enable bits for TSO/FSO */ -#ifdef IXGBE_FCOE - if ((tx_flags & IXGBE_TX_FLAGS_TSO) || (tx_flags & IXGBE_TX_FLAGS_FSO)) -#else - if (tx_flags & IXGBE_TX_FLAGS_TSO) -#endif - cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_TSE); + cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_TSO, + IXGBE_ADVTXD_DCMD_TSE); + + /* set timestamp bit if present */ + cmd_type |= IXGBE_SET_FLAG(tx_flags, IXGBE_TX_FLAGS_TSTAMP, + IXGBE_ADVTXD_MAC_TSTAMP); + + /* insert frame checksum */ + cmd_type ^= IXGBE_SET_FLAG(skb->no_fcs, 1, IXGBE_ADVTXD_DCMD_IFCS); return cmd_type; } -static __le32 ixgbe_tx_olinfo_status(u32 tx_flags, unsigned int paylen) +static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc, + u32 tx_flags, unsigned int paylen) { - __le32 olinfo_status = - cpu_to_le32(paylen << IXGBE_ADVTXD_PAYLEN_SHIFT); - - if (tx_flags & IXGBE_TX_FLAGS_TSO) { - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM | - (1 << IXGBE_ADVTXD_IDX_SHIFT)); - /* enble IPv4 checksum for TSO */ - if (tx_flags & IXGBE_TX_FLAGS_IPV4) - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM); - } + u32 olinfo_status = paylen << IXGBE_ADVTXD_PAYLEN_SHIFT; /* enable L4 checksum for TSO and TX checksum offload */ - if (tx_flags & IXGBE_TX_FLAGS_CSUM) - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM); + olinfo_status |= IXGBE_SET_FLAG(tx_flags, + IXGBE_TX_FLAGS_CSUM, + IXGBE_ADVTXD_POPTS_TXSM); -#ifdef IXGBE_FCOE - /* use index 1 context for FCOE/FSO */ - if (tx_flags & IXGBE_TX_FLAGS_FCOE) - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC | - (1 << IXGBE_ADVTXD_IDX_SHIFT)); + /* enble IPv4 checksum for TSO */ + olinfo_status |= IXGBE_SET_FLAG(tx_flags, + IXGBE_TX_FLAGS_IPV4, + IXGBE_ADVTXD_POPTS_IXSM); -#endif /* * Check Context must be set if Tx switch is enabled, which it * always is for case where virtual functions are running */ - if (tx_flags & IXGBE_TX_FLAGS_TXSW) - olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC); + olinfo_status |= IXGBE_SET_FLAG(tx_flags, + IXGBE_TX_FLAGS_CC, + IXGBE_ADVTXD_CC); - return olinfo_status; + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); } #define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ IXGBE_TXD_CMD_RS) static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, - struct sk_buff *skb, struct ixgbe_tx_buffer *first, - u32 tx_flags, const u8 hdr_len) { - struct device *dev = tx_ring->dev; - struct ixgbe_tx_buffer *tx_buffer_info; + struct sk_buff *skb = first->skb; + struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; - dma_addr_t dma; - __le32 cmd_type, olinfo_status; struct skb_frag_struct *frag; - unsigned int f = 0; - unsigned int data_len = skb->data_len; - unsigned int size = skb_headlen(skb); - u32 offset = 0; - u32 paylen = skb->len - hdr_len; + dma_addr_t dma; + unsigned int data_len, size; + u32 tx_flags = first->tx_flags; + u32 cmd_type = ixgbe_tx_cmd_type(skb, tx_flags); u16 i = tx_ring->next_to_use; - u16 gso_segs; + + tx_desc = IXGBE_TX_DESC(tx_ring, i); + + ixgbe_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len); + + size = skb_headlen(skb); + data_len = skb->data_len; #ifdef IXGBE_FCOE if (tx_flags & IXGBE_TX_FLAGS_FCOE) { - if (data_len >= sizeof(struct fcoe_crc_eof)) { - data_len -= sizeof(struct fcoe_crc_eof); - } else { + if (data_len < sizeof(struct fcoe_crc_eof)) { size -= sizeof(struct fcoe_crc_eof) - data_len; data_len = 0; + } else { + data_len -= sizeof(struct fcoe_crc_eof); } } #endif - dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_error; + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); - cmd_type = ixgbe_tx_cmd_type(tx_flags); - olinfo_status = ixgbe_tx_olinfo_status(tx_flags, paylen); + tx_buffer = first; - tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i); + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; - for (;;) { - while (size > IXGBE_MAX_DATA_PER_TXD) { - tx_desc->read.buffer_addr = cpu_to_le64(dma + offset); - tx_desc->read.cmd_type_len = - cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD); - tx_desc->read.olinfo_status = olinfo_status; + /* record length, and DMA address */ + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); - offset += IXGBE_MAX_DATA_PER_TXD; - size -= IXGBE_MAX_DATA_PER_TXD; + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) { + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type ^ IXGBE_MAX_DATA_PER_TXD); - tx_desc++; i++; + tx_desc++; if (i == tx_ring->count) { - tx_desc = IXGBE_TX_DESC_ADV(tx_ring, 0); + tx_desc = IXGBE_TX_DESC(tx_ring, 0); i = 0; } - } + tx_desc->read.olinfo_status = 0; - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - tx_buffer_info->length = offset + size; - tx_buffer_info->tx_flags = tx_flags; - tx_buffer_info->dma = dma; + dma += IXGBE_MAX_DATA_PER_TXD; + size -= IXGBE_MAX_DATA_PER_TXD; - tx_desc->read.buffer_addr = cpu_to_le64(dma + offset); - tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); - tx_desc->read.olinfo_status = olinfo_status; + tx_desc->read.buffer_addr = cpu_to_le64(dma); + } - if (!data_len) + if (likely(!data_len)) break; - frag = &skb_shinfo(skb)->frags[f]; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); + + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IXGBE_TX_DESC(tx_ring, 0); + i = 0; + } + tx_desc->read.olinfo_status = 0; + #ifdef IXGBE_FCOE size = min_t(unsigned int, data_len, skb_frag_size(frag)); #else size = skb_frag_size(frag); #endif data_len -= size; - f++; - offset = 0; - tx_flags |= IXGBE_TX_FLAGS_MAPPED_AS_PAGE; + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, + DMA_TO_DEVICE); - dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_error; - - tx_desc++; - i++; - if (i == tx_ring->count) { - tx_desc = IXGBE_TX_DESC_ADV(tx_ring, 0); - i = 0; - } + tx_buffer = &tx_ring->tx_buffer_info[i]; } - tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); + /* write last descriptor with RS and EOP bits */ + cmd_type |= size | IXGBE_TXD_CMD; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); - i++; - if (i == tx_ring->count) - i = 0; - - tx_ring->next_to_use = i; - - if (tx_flags & IXGBE_TX_FLAGS_TSO) - gso_segs = skb_shinfo(skb)->gso_segs; -#ifdef IXGBE_FCOE - /* adjust for FCoE Sequence Offload */ - else if (tx_flags & IXGBE_TX_FLAGS_FSO) - gso_segs = DIV_ROUND_UP(skb->len - hdr_len, - skb_shinfo(skb)->gso_size); -#endif /* IXGBE_FCOE */ - else - gso_segs = 1; - - /* multiply data chunks by size of headers */ - tx_buffer_info->bytecount = paylen + (gso_segs * hdr_len); - tx_buffer_info->gso_segs = gso_segs; - tx_buffer_info->skb = skb; + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); /* set the timestamp */ first->time_stamp = jiffies; /* - * Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). + * Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. (Only applicable for weak-ordered + * memory model archs, such as IA-64). + * + * We also need this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. */ wmb(); /* set next_to_watch value indicating a packet is present */ first->next_to_watch = tx_desc; + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + /* notify HW of packet */ - writel(i, tx_ring->tail); + ixgbe_write_tail(tx_ring, i); return; dma_error: - dev_err(dev, "TX DMA map failed\n"); + dev_err(tx_ring->dev, "TX DMA map failed\n"); /* clear dma mappings for failed tx_buffer_info map */ for (;;) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - ixgbe_unmap_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info == first) + tx_buffer = &tx_ring->tx_buffer_info[i]; + ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) break; if (i == 0) i = tx_ring->count; i--; } - dev_kfree_skb_any(skb); - tx_ring->next_to_use = i; } -static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol) +static void ixgbe_atr(struct ixgbe_ring *ring, + struct ixgbe_tx_buffer *first) { struct ixgbe_q_vector *q_vector = ring->q_vector; union ixgbe_atr_hash_dword input = { .dword = 0 }; @@ -6654,16 +7010,16 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, ring->atr_count++; /* snag network header to get L4 type and address */ - hdr.network = skb_network_header(skb); + hdr.network = skb_network_header(first->skb); /* Currently only IPv4/IPv6 with TCP is supported */ - if ((protocol != __constant_htons(ETH_P_IPV6) || + if ((first->protocol != htons(ETH_P_IPV6) || hdr.ipv6->nexthdr != IPPROTO_TCP) && - (protocol != __constant_htons(ETH_P_IP) || + (first->protocol != htons(ETH_P_IP) || hdr.ipv4->protocol != IPPROTO_TCP)) return; - th = tcp_hdr(skb); + th = tcp_hdr(first->skb); /* skip this packet since it is invalid or the socket is closing */ if (!th || th->fin) @@ -6676,7 +7032,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, /* reset sample count */ ring->atr_count = 0; - vlan_id = htons(tx_flags >> IXGBE_TX_FLAGS_VLAN_SHIFT); + vlan_id = htons(first->tx_flags >> IXGBE_TX_FLAGS_VLAN_SHIFT); /* * src and dst are inverted, think how the receiver sees them @@ -6691,13 +7047,13 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb, * since src port and flex bytes occupy the same word XOR them together * and write the value to source port portion of compressed dword */ - if (tx_flags & (IXGBE_TX_FLAGS_SW_VLAN | IXGBE_TX_FLAGS_HW_VLAN)) - common.port.src ^= th->dest ^ __constant_htons(ETH_P_8021Q); + if (first->tx_flags & (IXGBE_TX_FLAGS_SW_VLAN | IXGBE_TX_FLAGS_HW_VLAN)) + common.port.src ^= th->dest ^ htons(ETH_P_8021Q); else - common.port.src ^= th->dest ^ protocol; + common.port.src ^= th->dest ^ first->protocol; common.port.dst ^= th->source; - if (protocol == __constant_htons(ETH_P_IP)) { + if (first->protocol == htons(ETH_P_IP)) { input.formatted.flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4; common.ip ^= hdr.ipv4->saddr ^ hdr.ipv4->daddr; } else { @@ -6743,30 +7099,48 @@ static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size) return __ixgbe_maybe_stop_tx(tx_ring, size); } -static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb) +static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) { - struct ixgbe_adapter *adapter = netdev_priv(dev); - int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : - smp_processor_id(); + struct ixgbe_fwd_adapter *fwd_adapter = accel_priv; #ifdef IXGBE_FCOE - __be16 protocol = vlan_get_protocol(skb); - - if (((protocol == htons(ETH_P_FCOE)) || - (protocol == htons(ETH_P_FIP))) && - (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) { - txq &= (adapter->ring_feature[RING_F_FCOE].indices - 1); - txq += adapter->ring_feature[RING_F_FCOE].mask; - return txq; - } + struct ixgbe_adapter *adapter; + struct ixgbe_ring_feature *f; + int txq; #endif - if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { - while (unlikely(txq >= dev->real_num_tx_queues)) - txq -= dev->real_num_tx_queues; - return txq; + if (fwd_adapter) + return skb->queue_mapping + fwd_adapter->tx_base_queue; + +#ifdef IXGBE_FCOE + + /* + * only execute the code below if protocol is FCoE + * or FIP and we have FCoE enabled on the adapter + */ + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_FCOE): + case htons(ETH_P_FIP): + adapter = netdev_priv(dev); + + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) + break; + default: + return fallback(dev, skb); } - return skb_tx_hash(dev, skb); + f = &adapter->ring_feature[RING_F_FCOE]; + + txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : + smp_processor_id(); + + while (txq >= f->indices) + txq -= f->indices; + + return txq + f->offset; +#else + return fallback(dev, skb); +#endif } netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, @@ -6776,52 +7150,72 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, struct ixgbe_tx_buffer *first; int tso; u32 tx_flags = 0; -#if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD unsigned short f; -#endif u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = skb->protocol; u8 hdr_len = 0; /* * need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD, - * + 1 desc for skb_head_len/IXGBE_MAX_DATA_PER_TXD, + * + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD, * + 2 desc gap to keep tail from touching head, * + 1 desc for context descriptor, * otherwise try next time */ -#if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); -#else - count += skb_shinfo(skb)->nr_frags; -#endif + if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) { tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY; } -#ifdef CONFIG_PCI_IOV - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - tx_flags |= IXGBE_TX_FLAGS_TXSW; + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; -#endif /* if we have a HW VLAN tag being added default to the HW one */ if (vlan_tx_tag_present(skb)) { tx_flags |= vlan_tx_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT; tx_flags |= IXGBE_TX_FLAGS_HW_VLAN; /* else if it is a SW VLAN check the next protocol and store the tag */ - } else if (protocol == __constant_htons(ETH_P_8021Q)) { + } else if (protocol == htons(ETH_P_8021Q)) { struct vlan_hdr *vhdr, _vhdr; vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); if (!vhdr) goto out_drop; protocol = vhdr->h_vlan_encapsulated_proto; - tx_flags |= ntohs(vhdr->h_vlan_TCI) << IXGBE_TX_FLAGS_VLAN_SHIFT; + tx_flags |= ntohs(vhdr->h_vlan_TCI) << + IXGBE_TX_FLAGS_VLAN_SHIFT; tx_flags |= IXGBE_TX_FLAGS_SW_VLAN; } + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, + &adapter->state))) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_flags |= IXGBE_TX_FLAGS_TSTAMP; + + /* schedule check for Tx timestamp */ + adapter->ptp_tx_skb = skb_get(skb); + adapter->ptp_tx_start = jiffies; + schedule_work(&adapter->ptp_tx_work); + } + + skb_tx_timestamp(skb); + +#ifdef CONFIG_PCI_IOV + /* + * Use the l2switch_enable flag - would be false if the DMA + * Tx switch had been disabled. + */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + tx_flags |= IXGBE_TX_FLAGS_CC; + +#endif /* DCB maps skb priorities 0-7 onto 3 bit PCP of VLAN tag. */ if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && ((tx_flags & (IXGBE_TX_FLAGS_HW_VLAN | IXGBE_TX_FLAGS_SW_VLAN)) || @@ -6831,8 +7225,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT; if (tx_flags & IXGBE_TX_FLAGS_SW_VLAN) { struct vlan_ethhdr *vhdr; - if (skb_header_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + + if (skb_cow_head(skb, 0)) goto out_drop; vhdr = (struct vlan_ethhdr *)skb->data; vhdr->h_vlan_TCI = htons(tx_flags >> @@ -6842,65 +7236,77 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, } } - /* record the location of the first descriptor for this packet */ - first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + /* record initial flags and protocol */ + first->tx_flags = tx_flags; + first->protocol = protocol; #ifdef IXGBE_FCOE /* setup tx offload for FCoE */ - if ((protocol == __constant_htons(ETH_P_FCOE)) && - (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) { - tso = ixgbe_fso(tx_ring, skb, tx_flags, &hdr_len); + if ((protocol == htons(ETH_P_FCOE)) && + (tx_ring->netdev->features & (NETIF_F_FSO | NETIF_F_FCOE_CRC))) { + tso = ixgbe_fso(tx_ring, first, &hdr_len); if (tso < 0) goto out_drop; - else if (tso) - tx_flags |= IXGBE_TX_FLAGS_FSO | - IXGBE_TX_FLAGS_FCOE; - else - tx_flags |= IXGBE_TX_FLAGS_FCOE; goto xmit_fcoe; } #endif /* IXGBE_FCOE */ - /* setup IPv4/IPv6 offloads */ - if (protocol == __constant_htons(ETH_P_IP)) - tx_flags |= IXGBE_TX_FLAGS_IPV4; - - tso = ixgbe_tso(tx_ring, skb, tx_flags, protocol, &hdr_len); + tso = ixgbe_tso(tx_ring, first, &hdr_len); if (tso < 0) goto out_drop; - else if (tso) - tx_flags |= IXGBE_TX_FLAGS_TSO; - else if (ixgbe_tx_csum(tx_ring, skb, tx_flags, protocol)) - tx_flags |= IXGBE_TX_FLAGS_CSUM; + else if (!tso) + ixgbe_tx_csum(tx_ring, first); /* add the ATR filter if ATR is on */ if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state)) - ixgbe_atr(tx_ring, skb, tx_flags, protocol); + ixgbe_atr(tx_ring, first); #ifdef IXGBE_FCOE xmit_fcoe: #endif /* IXGBE_FCOE */ - ixgbe_tx_map(tx_ring, skb, first, tx_flags, hdr_len); + ixgbe_tx_map(tx_ring, first, hdr_len); ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED); return NETDEV_TX_OK; out_drop: - dev_kfree_skb_any(skb); + dev_kfree_skb_any(first->skb); + first->skb = NULL; + return NETDEV_TX_OK; } -static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb, + struct net_device *netdev, + struct ixgbe_ring *ring) { struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_ring *tx_ring; - tx_ring = adapter->tx_ring[skb->queue_mapping]; + /* + * The minimum packet size for olinfo paylen is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (unlikely(skb->len < 17)) { + if (skb_pad(skb, 17 - skb->len)) + return NETDEV_TX_OK; + skb->len = 17; + skb_set_tail_pointer(skb, 17); + } + + tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping]; + return ixgbe_xmit_frame_ring(skb, adapter, tx_ring); } +static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) +{ + return __ixgbe_xmit_frame(skb, netdev, NULL); +} + /** * ixgbe_set_mac - Change the Ethernet Address of the NIC * @netdev: network interface device structure @@ -6913,17 +7319,17 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p) struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; struct sockaddr *addr = p; + int ret; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + ixgbe_del_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); - hw->mac.ops.set_rar(hw, 0, hw->mac.addr, adapter->num_vfs, - IXGBE_RAH_AV); - - return 0; + ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, VMDQ_P(0)); + return ret > 0 ? 0 : ret; } static int @@ -6957,7 +7363,14 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd); + switch (cmd) { + case SIOCSHWTSTAMP: + return ixgbe_ptp_set_ts_config(adapter, req); + case SIOCGHWTSTAMP: + return ixgbe_ptp_get_ts_config(adapter, req); + default: + return mdio_mii_ioctl(&adapter->hw.phy.mdio, if_mii(req), cmd); + } } /** @@ -6971,12 +7384,15 @@ static int ixgbe_add_sanmac_netdev(struct net_device *dev) { int err = 0; struct ixgbe_adapter *adapter = netdev_priv(dev); - struct ixgbe_mac_info *mac = &adapter->hw.mac; + struct ixgbe_hw *hw = &adapter->hw; - if (is_valid_ether_addr(mac->san_addr)) { + if (is_valid_ether_addr(hw->mac.san_addr)) { rtnl_lock(); - err = dev_addr_add(dev, mac->san_addr, NETDEV_HW_ADDR_T_SAN); + err = dev_addr_add(dev, hw->mac.san_addr, NETDEV_HW_ADDR_T_SAN); rtnl_unlock(); + + /* update SAN MAC vmdq pool selection */ + hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0)); } return err; } @@ -7019,18 +7435,15 @@ static void ixgbe_netpoll(struct net_device *netdev) adapter->flags |= IXGBE_FLAG_IN_NETPOLL; if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { - int num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - for (i = 0; i < num_q_vectors; i++) { - struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; - ixgbe_msix_clean_rings(0, q_vector); - } + for (i = 0; i < adapter->num_q_vectors; i++) + ixgbe_msix_clean_rings(0, adapter->q_vector[i]); } else { ixgbe_intr(adapter->pdev->irq, netdev); } adapter->flags &= ~IXGBE_FLAG_IN_NETPOLL; } -#endif +#endif static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { @@ -7045,10 +7458,10 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, if (ring) { do { - start = u64_stats_fetch_begin_bh(&ring->syncp); + start = u64_stats_fetch_begin_irq(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); stats->rx_packets += packets; stats->rx_bytes += bytes; } @@ -7061,10 +7474,10 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, if (ring) { do { - start = u64_stats_fetch_begin_bh(&ring->syncp); + start = u64_stats_fetch_begin_irq(&ring->syncp); packets = ring->stats.packets; bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_bh(&ring->syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); stats->tx_packets += packets; stats->tx_bytes += bytes; } @@ -7079,8 +7492,10 @@ static struct rtnl_link_stats64 *ixgbe_get_stats64(struct net_device *netdev, return stats; } -/* ixgbe_validate_rtr - verify 802.1Qp to Rx packet buffer mapping is valid. - * #adapter: pointer to ixgbe_adapter +#ifdef CONFIG_IXGBE_DCB +/** + * ixgbe_validate_rtr - verify 802.1Qp to Rx packet buffer mapping is valid. + * @adapter: pointer to ixgbe_adapter * @tc: number of traffic classes currently enabled * * Configure a valid 802.1Qp to Rx packet buffer mapping ie confirm @@ -7115,9 +7530,34 @@ static void ixgbe_validate_rtr(struct ixgbe_adapter *adapter, u8 tc) return; } +/** + * ixgbe_set_prio_tc_map - Configure netdev prio tc map + * @adapter: Pointer to adapter struct + * + * Populate the netdev user priority to tc map + */ +static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter) +{ + struct net_device *dev = adapter->netdev; + struct ixgbe_dcb_config *dcb_cfg = &adapter->dcb_cfg; + struct ieee_ets *ets = adapter->ixgbe_ieee_ets; + u8 prio; -/* ixgbe_setup_tc - routine to configure net_device for multiple traffic - * classes. + for (prio = 0; prio < MAX_USER_PRIORITY; prio++) { + u8 tc = 0; + + if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE) + tc = ixgbe_dcb_get_tc_from_up(dcb_cfg, 0, prio); + else if (ets) + tc = ets->prio_tc[prio]; + + netdev_set_prio_tc_map(dev, prio, tc); + } +} + +#endif /* CONFIG_IXGBE_DCB */ +/** + * ixgbe_setup_tc - configure net_device for multiple traffic classes * * @netdev: net device to configure * @tc: number of traffic classes to enable @@ -7126,18 +7566,18 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; - - /* Multiple traffic classes requires multiple queues */ - if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { - e_err(drv, "Enable failed, needs MSI-X\n"); - return -EINVAL; - } + bool pools; /* Hardware supports up to 8 traffic classes */ if (tc > adapter->dcb_cfg.num_tcs.pg_tcs || - (hw->mac.type == ixgbe_mac_82598EB && tc < MAX_TRAFFIC_CLASS)) + (hw->mac.type == ixgbe_mac_82598EB && + tc < MAX_TRAFFIC_CLASS)) return -EINVAL; + pools = (find_first_zero_bit(&adapter->fwd_bitmask, 32) > 1); + if (tc && pools && adapter->num_rx_pools > IXGBE_MAX_DCBMACVLANS) + return -EBUSY; + /* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the * hardware is not flexible enough to do this dynamically. @@ -7146,35 +7586,51 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) ixgbe_close(dev); ixgbe_clear_interrupt_scheme(adapter); +#ifdef CONFIG_IXGBE_DCB if (tc) { netdev_set_num_tc(dev, tc); - adapter->last_lfc_mode = adapter->hw.fc.current_mode; + ixgbe_set_prio_tc_map(adapter); adapter->flags |= IXGBE_FLAG_DCB_ENABLED; - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - if (adapter->hw.mac.type == ixgbe_mac_82598EB) + if (adapter->hw.mac.type == ixgbe_mac_82598EB) { + adapter->last_lfc_mode = adapter->hw.fc.requested_mode; adapter->hw.fc.requested_mode = ixgbe_fc_none; + } } else { netdev_reset_tc(dev); - adapter->hw.fc.requested_mode = adapter->last_lfc_mode; + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + adapter->hw.fc.requested_mode = adapter->last_lfc_mode; adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->temp_dcb_cfg.pfc_mode_enable = false; adapter->dcb_cfg.pfc_mode_enable = false; } - ixgbe_init_interrupt_scheme(adapter); ixgbe_validate_rtr(adapter, tc); + +#endif /* CONFIG_IXGBE_DCB */ + ixgbe_init_interrupt_scheme(adapter); + if (netif_running(dev)) - ixgbe_open(dev); + return ixgbe_open(dev); return 0; } +#ifdef CONFIG_PCI_IOV +void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + rtnl_lock(); + ixgbe_setup_tc(netdev, netdev_get_num_tc(netdev)); + rtnl_unlock(); +} + +#endif void ixgbe_do_reset(struct net_device *netdev) { struct ixgbe_adapter *adapter = netdev_priv(netdev); @@ -7186,59 +7642,42 @@ void ixgbe_do_reset(struct net_device *netdev) } static netdev_features_t ixgbe_fix_features(struct net_device *netdev, - netdev_features_t data) + netdev_features_t features) { struct ixgbe_adapter *adapter = netdev_priv(netdev); -#ifdef CONFIG_DCB - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) - data &= ~NETIF_F_HW_VLAN_RX; -#endif - - /* return error if RXHASH is being enabled when RSS is not supported */ - if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) - data &= ~NETIF_F_RXHASH; - /* If Rx checksum is disabled, then RSC/LRO should also be disabled */ - if (!(data & NETIF_F_RXCSUM)) - data &= ~NETIF_F_LRO; + if (!(features & NETIF_F_RXCSUM)) + features &= ~NETIF_F_LRO; - /* Turn off LRO if not RSC capable or invalid ITR settings */ - if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)) { - data &= ~NETIF_F_LRO; - } else if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) && - (adapter->rx_itr_setting != 1 && - adapter->rx_itr_setting > IXGBE_MAX_RSC_INT_RATE)) { - data &= ~NETIF_F_LRO; - e_info(probe, "rx-usecs set too low, not enabling RSC\n"); - } + /* Turn off LRO if not RSC capable */ + if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)) + features &= ~NETIF_F_LRO; - return data; + return features; } static int ixgbe_set_features(struct net_device *netdev, - netdev_features_t data) + netdev_features_t features) { struct ixgbe_adapter *adapter = netdev_priv(netdev); + netdev_features_t changed = netdev->features ^ features; bool need_reset = false; - /* If Rx checksum is disabled, then RSC/LRO should also be disabled */ - if (!(data & NETIF_F_RXCSUM)) - adapter->flags &= ~IXGBE_FLAG_RX_CSUM_ENABLED; - else - adapter->flags |= IXGBE_FLAG_RX_CSUM_ENABLED; - /* Make sure RSC matches LRO, reset if change */ - if (!!(data & NETIF_F_LRO) != - !!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { - adapter->flags2 ^= IXGBE_FLAG2_RSC_ENABLED; - switch (adapter->hw.mac.type) { - case ixgbe_mac_X540: - case ixgbe_mac_82599EB: + if (!(features & NETIF_F_LRO)) { + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) need_reset = true; - break; - default: - break; + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; + } else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) && + !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { + if (adapter->rx_itr_setting == 1 || + adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) { + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + need_reset = true; + } else if ((changed ^ features) & NETIF_F_LRO) { + e_info(probe, "rx-usecs set too low, " + "disabling RSC\n"); } } @@ -7246,27 +7685,235 @@ static int ixgbe_set_features(struct net_device *netdev, * Check if Flow Director n-tuple support was enabled or disabled. If * the state changed, we need to reset. */ - if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) { + switch (features & NETIF_F_NTUPLE) { + case NETIF_F_NTUPLE: /* turn off ATR, enable perfect filters and reset */ - if (data & NETIF_F_NTUPLE) { - adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; - adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; + if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) need_reset = true; - } - } else if (!(data & NETIF_F_NTUPLE)) { - /* turn off Flow Director, set ATR and reset */ + + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; + break; + default: + /* turn off perfect filters, enable ATR and reset */ + if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) + need_reset = true; + adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; - if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) && - !(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) - adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; - need_reset = true; + + /* We cannot enable ATR if SR-IOV is enabled */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + break; + + /* We cannot enable ATR if we have 2 or more traffic classes */ + if (netdev_get_num_tc(netdev) > 1) + break; + + /* We cannot enable ATR if RSS is disabled */ + if (adapter->ring_feature[RING_F_RSS].limit <= 1) + break; + + /* A sample rate of 0 indicates ATR disabled */ + if (!adapter->atr_sample_rate) + break; + + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; + break; } + if (features & NETIF_F_HW_VLAN_CTAG_RX) + ixgbe_vlan_strip_enable(adapter); + else + ixgbe_vlan_strip_disable(adapter); + + if (changed & NETIF_F_RXALL) + need_reset = true; + + netdev->features = features; if (need_reset) ixgbe_do_reset(netdev); return 0; +} + +static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + int err; + + if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) + return ndo_dflt_fdb_add(ndm, tb, dev, addr, flags); + + /* Hardware does not support aging addresses so if a + * ndm_state is given only allow permanent addresses + */ + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + pr_info("%s: FDB only supports static addresses\n", + ixgbe_driver_name); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { + u32 rar_uc_entries = IXGBE_MAX_PF_MACVLANS; + + if (netdev_uc_count(dev) < rar_uc_entries) + err = dev_uc_add_excl(dev, addr); + else + err = -ENOMEM; + } else if (is_multicast_ether_addr(addr)) { + err = dev_mc_add_excl(dev, addr); + } else { + err = -EINVAL; + } + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} + +static int ixgbe_ndo_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct nlattr *attr, *br_spec; + int rem; + + if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) + return -EOPNOTSUPP; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + + nla_for_each_nested(attr, br_spec, rem) { + __u16 mode; + u32 reg = 0; + + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + mode = nla_get_u16(attr); + if (mode == BRIDGE_MODE_VEPA) { + reg = 0; + adapter->flags2 &= ~IXGBE_FLAG2_BRIDGE_MODE_VEB; + } else if (mode == BRIDGE_MODE_VEB) { + reg = IXGBE_PFDTXGSWC_VT_LBEN; + adapter->flags2 |= IXGBE_FLAG2_BRIDGE_MODE_VEB; + } else + return -EINVAL; + + IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC, reg); + + e_info(drv, "enabling bridge mode: %s\n", + mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB"); + } + + return 0; +} + +static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, + u32 filter_mask) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + u16 mode; + + if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) + return 0; + + if (adapter->flags2 & IXGBE_FLAG2_BRIDGE_MODE_VEB) + mode = BRIDGE_MODE_VEB; + else + mode = BRIDGE_MODE_VEPA; + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode); +} + +static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) +{ + struct ixgbe_fwd_adapter *fwd_adapter = NULL; + struct ixgbe_adapter *adapter = netdev_priv(pdev); + unsigned int limit; + int pool, err; + +#ifdef CONFIG_RPS + if (vdev->num_rx_queues != vdev->num_tx_queues) { + netdev_info(pdev, "%s: Only supports a single queue count for TX and RX\n", + vdev->name); + return ERR_PTR(-EINVAL); + } +#endif + /* Check for hardware restriction on number of rx/tx queues */ + if (vdev->num_tx_queues > IXGBE_MAX_L2A_QUEUES || + vdev->num_tx_queues == IXGBE_BAD_L2A_QUEUE) { + netdev_info(pdev, + "%s: Supports RX/TX Queue counts 1,2, and 4\n", + pdev->name); + return ERR_PTR(-EINVAL); + } + + if (((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && + adapter->num_rx_pools > IXGBE_MAX_DCBMACVLANS - 1) || + (adapter->num_rx_pools > IXGBE_MAX_MACVLANS)) + return ERR_PTR(-EBUSY); + + fwd_adapter = kcalloc(1, sizeof(struct ixgbe_fwd_adapter), GFP_KERNEL); + if (!fwd_adapter) + return ERR_PTR(-ENOMEM); + + pool = find_first_zero_bit(&adapter->fwd_bitmask, 32); + adapter->num_rx_pools++; + set_bit(pool, &adapter->fwd_bitmask); + limit = find_last_bit(&adapter->fwd_bitmask, 32); + + /* Enable VMDq flag so device will be set in VM mode */ + adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED | IXGBE_FLAG_SRIOV_ENABLED; + adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; + adapter->ring_feature[RING_F_RSS].limit = vdev->num_tx_queues; + + /* Force reinit of ring allocation with VMDQ enabled */ + err = ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev)); + if (err) + goto fwd_add_err; + fwd_adapter->pool = pool; + fwd_adapter->real_adapter = adapter; + err = ixgbe_fwd_ring_up(vdev, fwd_adapter); + if (err) + goto fwd_add_err; + netif_tx_start_all_queues(vdev); + return fwd_adapter; +fwd_add_err: + /* unwind counter and free adapter struct */ + netdev_info(pdev, + "%s: dfwd hardware acceleration failed\n", vdev->name); + clear_bit(pool, &adapter->fwd_bitmask); + adapter->num_rx_pools--; + kfree(fwd_adapter); + return ERR_PTR(err); +} + +static void ixgbe_fwd_del(struct net_device *pdev, void *priv) +{ + struct ixgbe_fwd_adapter *fwd_adapter = priv; + struct ixgbe_adapter *adapter = fwd_adapter->real_adapter; + unsigned int limit; + + clear_bit(fwd_adapter->pool, &adapter->fwd_bitmask); + adapter->num_rx_pools--; + + limit = find_last_bit(&adapter->fwd_bitmask, 32); + adapter->ring_feature[RING_F_VMDQ].limit = limit + 1; + ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter); + ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev)); + netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n", + fwd_adapter->pool, adapter->num_rx_pools, + fwd_adapter->rx_base_queue, + fwd_adapter->rx_base_queue + adapter->num_rx_queues_per_pool, + adapter->fwd_bitmask); + kfree(fwd_adapter); } static const struct net_device_ops ixgbe_netdev_ops = { @@ -7274,7 +7921,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_stop = ixgbe_close, .ndo_start_xmit = ixgbe_xmit_frame, .ndo_select_queue = ixgbe_select_queue, - .ndo_set_rx_mode = ixgbe_set_rx_mode, + .ndo_set_rx_mode = ixgbe_set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = ixgbe_set_mac, .ndo_change_mtu = ixgbe_change_mtu, @@ -7284,14 +7931,19 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_do_ioctl = ixgbe_ioctl, .ndo_set_vf_mac = ixgbe_ndo_set_vf_mac, .ndo_set_vf_vlan = ixgbe_ndo_set_vf_vlan, - .ndo_set_vf_tx_rate = ixgbe_ndo_set_vf_bw, - .ndo_set_vf_spoofchk = ixgbe_ndo_set_vf_spoofchk, + .ndo_set_vf_rate = ixgbe_ndo_set_vf_bw, + .ndo_set_vf_spoofchk = ixgbe_ndo_set_vf_spoofchk, .ndo_get_vf_config = ixgbe_ndo_get_vf_config, .ndo_get_stats64 = ixgbe_get_stats64, +#ifdef CONFIG_IXGBE_DCB .ndo_setup_tc = ixgbe_setup_tc, +#endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ixgbe_netpoll, #endif +#ifdef CONFIG_NET_RX_BUSY_POLL + .ndo_busy_poll = ixgbe_low_latency_recv, +#endif #ifdef IXGBE_FCOE .ndo_fcoe_ddp_setup = ixgbe_fcoe_ddp_get, .ndo_fcoe_ddp_target = ixgbe_fcoe_ddp_target, @@ -7303,25 +7955,109 @@ static const struct net_device_ops ixgbe_netdev_ops = { #endif /* IXGBE_FCOE */ .ndo_set_features = ixgbe_set_features, .ndo_fix_features = ixgbe_fix_features, + .ndo_fdb_add = ixgbe_ndo_fdb_add, + .ndo_bridge_setlink = ixgbe_ndo_bridge_setlink, + .ndo_bridge_getlink = ixgbe_ndo_bridge_getlink, + .ndo_dfwd_add_station = ixgbe_fwd_add, + .ndo_dfwd_del_station = ixgbe_fwd_del, }; -static void __devinit ixgbe_probe_vf(struct ixgbe_adapter *adapter, - const struct ixgbe_info *ii) +/** + * ixgbe_enumerate_functions - Get the number of ports this device has + * @adapter: adapter structure + * + * This function enumerates the phsyical functions co-located on a single slot, + * in order to determine how many ports a device has. This is most useful in + * determining the required GT/s of PCIe bandwidth necessary for optimal + * performance. + **/ +static inline int ixgbe_enumerate_functions(struct ixgbe_adapter *adapter) +{ + struct list_head *entry; + int physfns = 0; + + /* Some cards can not use the generic count PCIe functions method, + * because they are behind a parent switch, so we hardcode these with + * the correct number of functions. + */ + if (ixgbe_pcie_from_parent(&adapter->hw)) { + physfns = 4; + } else { + list_for_each(entry, &adapter->pdev->bus_list) { + struct pci_dev *pdev = + list_entry(entry, struct pci_dev, bus_list); + /* don't count virtual functions */ + if (!pdev->is_virtfn) + physfns++; + } + } + + return physfns; +} + +/** + * ixgbe_wol_supported - Check whether device supports WoL + * @hw: hw specific details + * @device_id: the device ID + * @subdev_id: the subsystem device ID + * + * This function is used by probe and ethtool to determine + * which devices have WoL support + * + **/ +int ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, + u16 subdevice_id) { -#ifdef CONFIG_PCI_IOV struct ixgbe_hw *hw = &adapter->hw; + u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK; + int is_wol_supported = 0; - if (hw->mac.type == ixgbe_mac_82598EB) - return; + switch (device_id) { + case IXGBE_DEV_ID_82599_SFP: + /* Only these subdevices could supports WOL */ + switch (subdevice_id) { + case IXGBE_SUBDEV_ID_82599_SFP_WOL0: + case IXGBE_SUBDEV_ID_82599_560FLR: + /* only support first port */ + if (hw->bus.func != 0) + break; + case IXGBE_SUBDEV_ID_82599_SP_560FLR: + case IXGBE_SUBDEV_ID_82599_SFP: + case IXGBE_SUBDEV_ID_82599_RNDC: + case IXGBE_SUBDEV_ID_82599_ECNA_DP: + case IXGBE_SUBDEV_ID_82599_LOM_SFP: + is_wol_supported = 1; + break; + } + break; + case IXGBE_DEV_ID_82599EN_SFP: + /* Only this subdevice supports WOL */ + switch (subdevice_id) { + case IXGBE_SUBDEV_ID_82599EN_SFP_OCP1: + is_wol_supported = 1; + break; + } + break; + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + /* All except this subdevice support WOL */ + if (subdevice_id != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) + is_wol_supported = 1; + break; + case IXGBE_DEV_ID_82599_KX4: + is_wol_supported = 1; + break; + case IXGBE_DEV_ID_X540T: + case IXGBE_DEV_ID_X540T1: + /* check eeprom to see if enabled wol */ + if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) || + ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) && + (hw->bus.func == 0))) { + is_wol_supported = 1; + } + break; + } - /* The 82599 supports up to 64 VFs per physical function - * but this implementation limits allocation to 63 so that - * basic networking resources are still available to the - * physical function - */ - adapter->num_vfs = (max_vfs > 63) ? 63 : max_vfs; - ixgbe_enable_sriov(adapter, ii); -#endif /* CONFIG_PCI_IOV */ + return is_wol_supported; } /** @@ -7335,22 +8071,20 @@ static void __devinit ixgbe_probe_vf(struct ixgbe_adapter *adapter, * The OS initialization, configuring of the adapter private structure, * and a hardware reset occur. **/ -static int __devinit ixgbe_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct ixgbe_adapter *adapter = NULL; struct ixgbe_hw *hw; const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; static int cards_found; - int i, err, pci_using_dac; + int i, err, pci_using_dac, expected_gts; + unsigned int indices = MAX_TX_QUEUES; u8 part_str[IXGBE_PBANUM_LENGTH]; - unsigned int indices = num_possible_cpus(); #ifdef IXGBE_FCOE u16 device_caps; #endif u32 eec; - u16 wol_cap; /* Catch broken hardware that put the wrong VF device ID in * the PCIe SR-IOV capability. @@ -7365,19 +8099,14 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (err) return err; - if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) && - !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { pci_using_dac = 1; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA configuration, aborting\n"); - goto err_dma; - } + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } pci_using_dac = 0; } @@ -7395,19 +8124,15 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, pci_set_master(pdev); pci_save_state(pdev); + if (ii->mac == ixgbe_mac_82598EB) { #ifdef CONFIG_IXGBE_DCB - indices *= MAX_TRAFFIC_CLASS; + /* 8 TC w/ 4 queues per TC */ + indices = 4 * MAX_TRAFFIC_CLASS; +#else + indices = IXGBE_MAX_RSS_INDICES; #endif + } - if (ii->mac == ixgbe_mac_82598EB) - indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES); - else - indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES); - -#ifdef IXGBE_FCOE - indices += min_t(unsigned int, num_possible_cpus(), - IXGBE_MAX_FCOE_INDICES); -#endif netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); if (!netdev) { err = -ENOMEM; @@ -7423,20 +8148,16 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, adapter->pdev = pdev; hw = &adapter->hw; hw->back = adapter; - adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1; + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0)); + adapter->io_addr = hw->hw_addr; if (!hw->hw_addr) { err = -EIO; goto err_ioremap; } - for (i = 1; i <= 5; i++) { - if (pci_resource_len(pdev, i) == 0) - continue; - } - netdev->netdev_ops = &ixgbe_netdev_ops; ixgbe_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; @@ -7451,6 +8172,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, /* EEPROM */ memcpy(&hw->eeprom.ops, ii->eeprom_ops, sizeof(hw->eeprom.ops)); eec = IXGBE_READ_REG(hw, IXGBE_EEC); + if (ixgbe_removed(hw->hw_addr)) { + err = -EIO; + goto err_ioremap; + } /* If EEPROM is valid (bit 8 = 1), use default otherwise use bit bang */ if (!(eec & (1 << 8))) hw->eeprom.ops.read = &ixgbe_read_eeprom_bit_bang_generic; @@ -7493,6 +8218,9 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, e_crit(probe, "Fan has stopped, replace the adapter\n"); } + if (allow_unsupported_sfp) + hw->allow_unsupported_sfp = allow_unsupported_sfp; + /* reset_hw fills in the perm_addr as well */ hw->phy.reset_if_overtemp = true; err = hw->mac.ops.reset_hw(hw); @@ -7501,30 +8229,38 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, hw->mac.type == ixgbe_mac_82598EB) { err = 0; } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - e_dev_err("failed to load because an unsupported SFP+ " - "module type was detected.\n"); - e_dev_err("Reload the driver after installing a supported " - "module.\n"); + e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n"); + e_dev_err("Reload the driver after installing a supported module.\n"); goto err_sw_init; } else if (err) { e_dev_err("HW Init failed: %d\n", err); goto err_sw_init; } - ixgbe_probe_vf(adapter, ii); +#ifdef CONFIG_PCI_IOV + /* SR-IOV not supported on the 82598 */ + if (adapter->hw.mac.type == ixgbe_mac_82598EB) + goto skip_sriov; + /* Mailbox */ + ixgbe_init_mbx_params_pf(hw); + memcpy(&hw->mbx.ops, ii->mbx_ops, sizeof(hw->mbx.ops)); + pci_sriov_set_totalvfs(pdev, IXGBE_MAX_VFS_DRV_LIMIT); + ixgbe_enable_sriov(adapter); +skip_sriov: +#endif netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | NETIF_F_RXCSUM; - netdev->hw_features = netdev->features; + netdev->hw_features = netdev->features | NETIF_F_HW_L2FW_DOFFLOAD; switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: @@ -7537,6 +8273,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, break; } + netdev->hw_features |= NETIF_F_RXALL; + netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_IP_CSUM; @@ -7544,10 +8282,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, netdev->vlan_features |= NETIF_F_SG; netdev->priv_flags |= IFF_UNICAST_FLT; - - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - adapter->flags &= ~(IXGBE_FLAG_RSS_ENABLED | - IXGBE_FLAG_DCB_ENABLED); + netdev->priv_flags |= IFF_SUPP_NOFCS; #ifdef CONFIG_IXGBE_DCB netdev->dcbnl_ops = &dcbnl_ops; @@ -7555,16 +8290,24 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, #ifdef IXGBE_FCOE if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) { + unsigned int fcoe_l; + if (hw->mac.ops.get_device_caps) { hw->mac.ops.get_device_caps(hw, &device_caps); if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS) adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE; } - } - if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) { - netdev->vlan_features |= NETIF_F_FCOE_CRC; - netdev->vlan_features |= NETIF_F_FSO; - netdev->vlan_features |= NETIF_F_FCOE_MTU; + + + fcoe_l = min_t(int, IXGBE_FCRETA_SIZE, num_online_cpus()); + adapter->ring_feature[RING_F_FCOE].limit = fcoe_l; + + netdev->features |= NETIF_F_FSO | + NETIF_F_FCOE_CRC; + + netdev->vlan_features |= NETIF_F_FSO | + NETIF_F_FCOE_CRC | + NETIF_F_FCOE_MTU; } #endif /* IXGBE_FCOE */ if (pci_using_dac) { @@ -7581,67 +8324,42 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) { e_dev_err("The EEPROM Checksum Is Not Valid\n"); err = -EIO; - goto err_eeprom; + goto err_sw_init; } memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len); - memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len); - if (ixgbe_validate_mac_addr(netdev->perm_addr)) { + if (!is_valid_ether_addr(netdev->dev_addr)) { e_dev_err("invalid MAC address\n"); err = -EIO; - goto err_eeprom; + goto err_sw_init; } + ixgbe_mac_set_default_filter(adapter, hw->mac.perm_addr); + setup_timer(&adapter->service_timer, &ixgbe_service_timer, - (unsigned long) adapter); + (unsigned long) adapter); + if (ixgbe_removed(hw->hw_addr)) { + err = -EIO; + goto err_sw_init; + } INIT_WORK(&adapter->service_task, ixgbe_service_task); + set_bit(__IXGBE_SERVICE_INITED, &adapter->state); clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state); err = ixgbe_init_interrupt_scheme(adapter); if (err) goto err_sw_init; - if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) { - netdev->hw_features &= ~NETIF_F_RXHASH; - netdev->features &= ~NETIF_F_RXHASH; - } - - /* WOL not supported for all but the following */ + /* WOL not supported for all devices */ adapter->wol = 0; - switch (pdev->device) { - case IXGBE_DEV_ID_82599_SFP: - /* Only these subdevice supports WOL */ - switch (pdev->subsystem_device) { - case IXGBE_SUBDEV_ID_82599_560FLR: - /* only support first port */ - if (hw->bus.func != 0) - break; - case IXGBE_SUBDEV_ID_82599_SFP: - adapter->wol = IXGBE_WUFC_MAG; - break; - } - break; - case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: - /* All except this subdevice support WOL */ - if (pdev->subsystem_device != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) - adapter->wol = IXGBE_WUFC_MAG; - break; - case IXGBE_DEV_ID_82599_KX4: + hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap); + hw->wol_enabled = ixgbe_wol_supported(adapter, pdev->device, + pdev->subsystem_device); + if (hw->wol_enabled) adapter->wol = IXGBE_WUFC_MAG; - break; - case IXGBE_DEV_ID_X540T: - /* Check eeprom to see if it is enabled */ - hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap); - wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK; - if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) || - ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) && - (hw->bus.func == 0))) - adapter->wol = IXGBE_WUFC_MAG; - break; - } device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); /* save off EEPROM version number */ @@ -7650,17 +8368,23 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, /* pick up the PCI bus settings for reporting later */ hw->mac.ops.get_bus_info(hw); + if (ixgbe_pcie_from_parent(hw)) + ixgbe_get_parent_bus_info(adapter); - /* print bus type/speed/width info */ - e_dev_info("(PCI Express:%s:%s) %pM\n", - (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" : - hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : - "Unknown"), - (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" : - hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" : - hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : - "Unknown"), - netdev->dev_addr); + /* calculate the expected PCIe bandwidth required for optimal + * performance. Note that some older parts will never have enough + * bandwidth due to being older generation PCIe parts. We clamp these + * parts to ensure no warning is displayed if it can't be fixed. + */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + expected_gts = min(ixgbe_enumerate_functions(adapter) * 10, 16); + break; + default: + expected_gts = ixgbe_enumerate_functions(adapter) * 10; + break; + } + ixgbe_check_minimum_link(adapter, expected_gts); err = ixgbe_read_pba_string_generic(hw, part_str, IXGBE_PBANUM_LENGTH); if (err) @@ -7668,21 +8392,15 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present) e_dev_info("MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n", hw->mac.type, hw->phy.type, hw->phy.sfp_type, - part_str); + part_str); else e_dev_info("MAC: %d, PHY: %d, PBA No: %s\n", hw->mac.type, hw->phy.type, part_str); - if (hw->bus.width <= ixgbe_bus_width_pcie_x4) { - e_dev_warn("PCI-Express bandwidth available for this card is " - "not sufficient for optimal performance.\n"); - e_dev_warn("For optimal performance a x8 PCI-Express slot " - "is required.\n"); - } + e_dev_info("%pM\n", netdev->dev_addr); /* reset the hardware with the new settings */ err = hw->mac.ops.start_hw(hw); - if (err == IXGBE_ERR_EEPROM_VERSION) { /* We are running on a pre-production device, log a warning */ e_dev_warn("This device is a pre-production adapter/LOM. " @@ -7697,11 +8415,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (err) goto err_register; - /* power down the optics for multispeed fiber and 82599 SFP+ fiber */ - if (hw->mac.ops.disable_tx_laser && - ((hw->phy.multispeed_fiber) || - ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && - (hw->mac.type == ixgbe_mac_82599EB)))) + /* power down the optics for 82599 SFP+ fiber */ + if (hw->mac.ops.disable_tx_laser) hw->mac.ops.disable_tx_laser(hw); /* carrier off reporting is important to ethtool even BEFORE open */ @@ -7731,17 +8446,30 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, e_dev_info("%s\n", ixgbe_default_device_descr); cards_found++; + +#ifdef CONFIG_IXGBE_HWMON + if (ixgbe_sysfs_init(adapter)) + e_err(probe, "failed to allocate sysfs resources\n"); +#endif /* CONFIG_IXGBE_HWMON */ + + ixgbe_dbg_adapter_init(adapter); + + /* setup link for SFP devices with MNG FW, else wait for IXGBE_UP */ + if (ixgbe_mng_enabled(hw) && ixgbe_is_sfp(hw) && hw->mac.ops.setup_link) + hw->mac.ops.setup_link(hw, + IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL, + true); + return 0; err_register: ixgbe_release_hw_control(adapter); ixgbe_clear_interrupt_scheme(adapter); err_sw_init: -err_eeprom: - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) - ixgbe_disable_sriov(adapter); + ixgbe_disable_sriov(adapter); adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP; - iounmap(hw->hw_addr); + iounmap(adapter->io_addr); + kfree(adapter->mac_table); err_ioremap: free_netdev(netdev); err_alloc_etherdev: @@ -7749,7 +8477,8 @@ err_alloc_etherdev: pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_reg: err_dma: - pci_disable_device(pdev); + if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state)) + pci_disable_device(pdev); return err; } @@ -7762,14 +8491,17 @@ err_dma: * Hot-Plug event, or because the driver is going to be removed from * memory. **/ -static void __devexit ixgbe_remove(struct pci_dev *pdev) +static void ixgbe_remove(struct pci_dev *pdev) { struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; - set_bit(__IXGBE_DOWN, &adapter->state); + ixgbe_dbg_adapter_exit(adapter); + + set_bit(__IXGBE_REMOVING, &adapter->state); cancel_work_sync(&adapter->service_task); + #ifdef CONFIG_IXGBE_DCA if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED; @@ -7778,11 +8510,9 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) } #endif -#ifdef IXGBE_FCOE - if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) - ixgbe_cleanup_fcoe(adapter); - -#endif /* IXGBE_FCOE */ +#ifdef CONFIG_IXGBE_HWMON + ixgbe_sysfs_exit(adapter); +#endif /* CONFIG_IXGBE_HWMON */ /* remove the added san mac */ ixgbe_del_sanmac_netdev(netdev); @@ -7790,29 +8520,36 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); - if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { - if (!(ixgbe_check_vf_assignment(adapter))) - ixgbe_disable_sriov(adapter); - else - e_dev_warn("Unloading driver while VFs are assigned " - "- VFs will not be deallocated\n"); - } - +#ifdef CONFIG_PCI_IOV + /* + * Only disable SR-IOV on unload if the user specified the now + * deprecated max_vfs module parameter. + */ + if (max_vfs) + ixgbe_disable_sriov(adapter); +#endif ixgbe_clear_interrupt_scheme(adapter); ixgbe_release_hw_control(adapter); - iounmap(adapter->hw.hw_addr); +#ifdef CONFIG_DCB + kfree(adapter->ixgbe_ieee_pfc); + kfree(adapter->ixgbe_ieee_ets); + +#endif + iounmap(adapter->io_addr); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); e_dev_info("complete\n"); + kfree(adapter->mac_table); free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); - pci_disable_device(pdev); + if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state)) + pci_disable_device(pdev); } /** @@ -7830,6 +8567,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, struct net_device *netdev = adapter->netdev; #ifdef CONFIG_PCI_IOV + struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *bdev, *vfdev; u32 dw0, dw1, dw2, dw3; int vf, pos; @@ -7840,7 +8578,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, goto skip_bad_vf_detection; bdev = pdev->bus->self; - while (bdev && (bdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) + while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT)) bdev = bdev->bus->self; if (!bdev) @@ -7850,10 +8588,12 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, if (!pos) goto skip_bad_vf_detection; - pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0); - pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1); - pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2); - pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3); + dw0 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG); + dw1 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 4); + dw2 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 8); + dw3 = ixgbe_read_pci_cfg_dword(hw, pos + PCI_ERR_HEADER_LOG + 12); + if (ixgbe_removed(hw->hw_addr)) + goto skip_bad_vf_detection; req_id = dw1 >> 16; /* On the 82599 if bit 7 of the requestor ID is set then it's a VF */ @@ -7882,11 +8622,11 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, } /* Find the pci device of the offending VF */ - vfdev = pci_get_device(IXGBE_INTEL_VENDOR_ID, device_id, NULL); + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, device_id, NULL); while (vfdev) { if (vfdev->devfn == (req_id & 0xFF)) break; - vfdev = pci_get_device(IXGBE_INTEL_VENDOR_ID, + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, device_id, vfdev); } /* @@ -7897,6 +8637,8 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, if (vfdev) { e_dev_err("Issuing VFLR to VF %d\n", vf); pci_write_config_dword(vfdev, 0xA8, 0x00008000); + /* Free device reference count */ + pci_dev_put(vfdev); } pci_cleanup_aer_uncorrect_error_status(pdev); @@ -7914,14 +8656,23 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, skip_bad_vf_detection: #endif /* CONFIG_PCI_IOV */ + if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state)) + return PCI_ERS_RESULT_DISCONNECT; + + rtnl_lock(); netif_device_detach(netdev); - if (state == pci_channel_io_perm_failure) + if (state == pci_channel_io_perm_failure) { + rtnl_unlock(); return PCI_ERS_RESULT_DISCONNECT; + } if (netif_running(netdev)) ixgbe_down(adapter); - pci_disable_device(pdev); + + if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state)) + pci_disable_device(pdev); + rtnl_unlock(); /* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET; @@ -7943,6 +8694,9 @@ static pci_ers_result_t ixgbe_io_slot_reset(struct pci_dev *pdev) e_err(probe, "Cannot re-enable PCI device after reset.\n"); result = PCI_ERS_RESULT_DISCONNECT; } else { + smp_mb__before_atomic(); + clear_bit(__IXGBE_DISABLED, &adapter->state); + adapter->hw.hw_addr = adapter->io_addr; pci_set_master(pdev); pci_restore_state(pdev); pci_save_state(pdev); @@ -7990,7 +8744,7 @@ static void ixgbe_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); } -static struct pci_error_handlers ixgbe_err_handler = { +static const struct pci_error_handlers ixgbe_err_handler = { .error_detected = ixgbe_io_error_detected, .slot_reset = ixgbe_io_slot_reset, .resume = ixgbe_io_resume, @@ -8000,12 +8754,13 @@ static struct pci_driver ixgbe_driver = { .name = ixgbe_driver_name, .id_table = ixgbe_pci_tbl, .probe = ixgbe_probe, - .remove = __devexit_p(ixgbe_remove), + .remove = ixgbe_remove, #ifdef CONFIG_PM .suspend = ixgbe_suspend, .resume = ixgbe_resume, #endif .shutdown = ixgbe_shutdown, + .sriov_configure = ixgbe_pci_sriov_configure, .err_handler = &ixgbe_err_handler }; @@ -8021,12 +8776,19 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); + ixgbe_dbg_init(); + + ret = pci_register_driver(&ixgbe_driver); + if (ret) { + ixgbe_dbg_exit(); + return ret; + } + #ifdef CONFIG_IXGBE_DCA dca_register_notify(&dca_notifier); #endif - ret = pci_register_driver(&ixgbe_driver); - return ret; + return 0; } module_init(ixgbe_init_module); @@ -8043,6 +8805,9 @@ static void __exit ixgbe_exit_module(void) dca_unregister_notify(&dca_notifier); #endif pci_unregister_driver(&ixgbe_driver); + + ixgbe_dbg_exit(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } |
