diff options
Diffstat (limited to 'drivers/net/ethernet/intel/igb/igb_main.c')
| -rw-r--r-- | drivers/net/ethernet/intel/igb/igb_main.c | 4781 |
1 files changed, 2864 insertions, 1917 deletions
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 94be6c32fa7..a9537ba7a5a 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1,29 +1,25 @@ -/******************************************************************************* - - Intel(R) Gigabit Ethernet Linux driver - Copyright(c) 2007-2012 Intel Corporation. - - This program is free software; you can redistribute it and/or modify it - under the terms and conditions of the GNU General Public License, - version 2, as published by the Free Software Foundation. - - This program is distributed in the hope it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - - The full GNU General Public License is included in this distribution in - the file called "COPYING". - - Contact Information: - e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> - Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 - -*******************************************************************************/ +/* Intel(R) Gigabit Ethernet Linux driver + * Copyright(c) 2007-2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, see <http://www.gnu.org/licenses/>. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + * Contact Information: + * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -57,24 +53,36 @@ #ifdef CONFIG_IGB_DCA #include <linux/dca.h> #endif +#include <linux/i2c.h> #include "igb.h" -#define MAJ 3 -#define MIN 2 -#define BUILD 10 +#define MAJ 5 +#define MIN 0 +#define BUILD 5 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" char igb_driver_name[] = "igb"; char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = "Intel(R) Gigabit Ethernet Network Driver"; -static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation."; +static const char igb_copyright[] = + "Copyright (c) 2007-2014 Intel Corporation."; static const struct e1000_info *igb_info_tbl[] = { [board_82575] = &e1000_82575_info, }; -static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { +static const struct pci_device_id igb_pci_tbl[] = { + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 }, @@ -106,18 +114,17 @@ static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { MODULE_DEVICE_TABLE(pci, igb_pci_tbl); -void igb_reset(struct igb_adapter *); static int igb_setup_all_tx_resources(struct igb_adapter *); static int igb_setup_all_rx_resources(struct igb_adapter *); static void igb_free_all_tx_resources(struct igb_adapter *); static void igb_free_all_rx_resources(struct igb_adapter *); static void igb_setup_mrqc(struct igb_adapter *); static int igb_probe(struct pci_dev *, const struct pci_device_id *); -static void __devexit igb_remove(struct pci_dev *pdev); -static void igb_init_hw_timer(struct igb_adapter *adapter); +static void igb_remove(struct pci_dev *pdev); static int igb_sw_init(struct igb_adapter *); static int igb_open(struct net_device *); static int igb_close(struct net_device *); +static void igb_configure(struct igb_adapter *); static void igb_configure_tx(struct igb_adapter *); static void igb_configure_rx(struct igb_adapter *); static void igb_clean_all_tx_rings(struct igb_adapter *); @@ -130,7 +137,7 @@ static void igb_watchdog(unsigned long); static void igb_watchdog_task(struct work_struct *); static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats); + struct rtnl_link_stats64 *stats); static int igb_change_mtu(struct net_device *, int); static int igb_set_mac(struct net_device *, void *); static void igb_set_uta(struct igb_adapter *adapter); @@ -148,9 +155,10 @@ static bool igb_clean_rx_irq(struct igb_q_vector *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); static void igb_tx_timeout(struct net_device *); static void igb_reset_task(struct work_struct *); -static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features); -static int igb_vlan_rx_add_vid(struct net_device *, u16); -static int igb_vlan_rx_kill_vid(struct net_device *, u16); +static void igb_vlan_mode(struct net_device *netdev, + netdev_features_t features); +static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16); static void igb_restore_vlan(struct igb_adapter *); static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8); static void igb_ping_all_vfs(struct igb_adapter *); @@ -161,19 +169,22 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); +static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, + bool setting); static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); static void igb_check_vf_rate_limit(struct igb_adapter *); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); -static int igb_find_enabled_vfs(struct igb_adapter *adapter); -static int igb_check_vf_assignment(struct igb_adapter *adapter); +static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs); #endif #ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int igb_suspend(struct device *); +#endif static int igb_resume(struct device *); #ifdef CONFIG_PM_RUNTIME static int igb_runtime_suspend(struct device *dev); @@ -187,6 +198,7 @@ static const struct dev_pm_ops igb_pm_ops = { }; #endif static void igb_shutdown(struct pci_dev *); +static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs); #ifdef CONFIG_IGB_DCA static int igb_notify_dca(struct notifier_block *, unsigned long, void *); static struct notifier_block dca_notifier = { @@ -200,10 +212,9 @@ static struct notifier_block dca_notifier = { static void igb_netpoll(struct net_device *); #endif #ifdef CONFIG_PCI_IOV -static unsigned int max_vfs = 0; +static unsigned int max_vfs; module_param(max_vfs, uint, 0); -MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate " - "per physical function"); +MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function"); #endif /* CONFIG_PCI_IOV */ static pci_ers_result_t igb_io_error_detected(struct pci_dev *, @@ -211,7 +222,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *, static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); static void igb_io_resume(struct pci_dev *); -static struct pci_error_handlers igb_err_handler = { +static const struct pci_error_handlers igb_err_handler = { .error_detected = igb_io_error_detected, .slot_reset = igb_io_slot_reset, .resume = igb_io_resume, @@ -223,11 +234,12 @@ static struct pci_driver igb_driver = { .name = igb_driver_name, .id_table = igb_pci_tbl, .probe = igb_probe, - .remove = __devexit_p(igb_remove), + .remove = igb_remove, #ifdef CONFIG_PM .driver.pm = &igb_pm_ops, #endif .shutdown = igb_shutdown, + .sriov_configure = igb_pci_sriov_configure, .err_handler = &igb_err_handler }; @@ -236,6 +248,11 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) +static int debug = -1; +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + struct igb_reg_info { u32 ofs; char *name; @@ -277,9 +294,7 @@ static const struct igb_reg_info igb_reg_info_tbl[] = { {} }; -/* - * igb_regdump - register printout routine - */ +/* igb_regdump - register printout routine */ static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) { int n = 0; @@ -345,9 +360,7 @@ static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo) regs[2], regs[3]); } -/* - * igb_dump - Print registers, tx-rings and rx-rings - */ +/* igb_dump - Print registers, Tx-rings and Rx-rings */ static void igb_dump(struct igb_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -367,8 +380,7 @@ static void igb_dump(struct igb_adapter *adapter) /* Print netdevice Info */ if (netdev) { dev_info(&adapter->pdev->dev, "Net device Info\n"); - pr_info("Device Name state trans_start " - "last_rx\n"); + pr_info("Device Name state trans_start last_rx\n"); pr_info("%-15s %016lX %016lX %016lX\n", netdev->name, netdev->state, netdev->trans_start, netdev->last_rx); } @@ -393,8 +405,8 @@ static void igb_dump(struct igb_adapter *adapter) buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp); } @@ -421,9 +433,7 @@ static void igb_dump(struct igb_adapter *adapter) pr_info("------------------------------------\n"); pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] " - "[bi->dma ] leng ntw timestamp " - "bi->skb\n"); + pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] [bi->dma ] leng ntw timestamp bi->skb\n"); for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { const char *next_desc; @@ -441,21 +451,21 @@ static void igb_dump(struct igb_adapter *adapter) else next_desc = ""; - pr_info("T [0x%03X] %016llX %016llX %016llX" - " %04X %p %016llX %p%s\n", i, - le64_to_cpu(u0->a), + pr_info("T [0x%03X] %016llX %016llX %016llX %04X %p %016llX %p%s\n", + i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp, buffer_info->skb, next_desc); - if (netif_msg_pktdata(adapter) && buffer_info->dma != 0) + if (netif_msg_pktdata(adapter) && buffer_info->skb) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, - 16, 1, phys_to_virt(buffer_info->dma), - buffer_info->length, true); + 16, 1, buffer_info->skb->data, + dma_unmap_len(buffer_info, len), + true); } } @@ -501,10 +511,8 @@ rx_ring_summary: pr_info("------------------------------------\n"); pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); pr_info("------------------------------------\n"); - pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] " - "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); - pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----" - "----------- [bi->skb] <-- Adv Rx Write-Back format\n"); + pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] [bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); + pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n"); for (i = 0; i < rx_ring->count; i++) { const char *next_desc; @@ -523,32 +531,27 @@ rx_ring_summary: if (staterr & E1000_RXD_STAT_DD) { /* Descriptor Done */ - pr_info("%s[0x%03X] %016llX %016llX -------" - "--------- %p%s\n", "RWB", i, + pr_info("%s[0x%03X] %016llX %016llX ---------------- %s\n", + "RWB", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - buffer_info->skb, next_desc); + next_desc); } else { - pr_info("%s[0x%03X] %016llX %016llX %016llX" - " %p%s\n", "R ", i, + pr_info("%s[0x%03X] %016llX %016llX %016llX %s\n", + "R ", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), (u64)buffer_info->dma, - buffer_info->skb, next_desc); + next_desc); - if (netif_msg_pktdata(adapter)) { - print_hex_dump(KERN_INFO, "", - DUMP_PREFIX_ADDRESS, - 16, 1, - phys_to_virt(buffer_info->dma), - IGB_RX_HDR_LEN, true); + if (netif_msg_pktdata(adapter) && + buffer_info->dma && buffer_info->page) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, - phys_to_virt( - buffer_info->page_dma + - buffer_info->page_offset), - PAGE_SIZE/2, true); + page_address(buffer_info->page) + + buffer_info->page_offset, + IGB_RX_BUFSZ, true); } } } @@ -558,36 +561,100 @@ exit: return; } +/** + * igb_get_i2c_data - Reads the I2C SDA data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Returns the I2C data bit value + **/ +static int igb_get_i2c_data(void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = rd32(E1000_I2CPARAMS); + + return !!(i2cctl & E1000_I2C_DATA_IN); +} + +/** + * igb_set_i2c_data - Sets the I2C data bit + * @data: pointer to hardware structure + * @state: I2C data value (0 or 1) to set + * + * Sets the I2C data bit + **/ +static void igb_set_i2c_data(void *data, int state) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = rd32(E1000_I2CPARAMS); + + if (state) + i2cctl |= E1000_I2C_DATA_OUT; + else + i2cctl &= ~E1000_I2C_DATA_OUT; + + i2cctl &= ~E1000_I2C_DATA_OE_N; + i2cctl |= E1000_I2C_CLK_OE_N; + wr32(E1000_I2CPARAMS, i2cctl); + wrfl(); + +} /** - * igb_read_clock - read raw cycle counter (to be used by time counter) - */ -static cycle_t igb_read_clock(const struct cyclecounter *tc) + * igb_set_i2c_clk - Sets the I2C SCL clock + * @data: pointer to hardware structure + * @state: state to set clock + * + * Sets the I2C clock line to state + **/ +static void igb_set_i2c_clk(void *data, int state) { - struct igb_adapter *adapter = - container_of(tc, struct igb_adapter, cycles); + struct igb_adapter *adapter = (struct igb_adapter *)data; struct e1000_hw *hw = &adapter->hw; - u64 stamp = 0; - int shift = 0; + s32 i2cctl = rd32(E1000_I2CPARAMS); - /* - * The timestamp latches on lowest register read. For the 82580 - * the lowest register is SYSTIMR instead of SYSTIML. However we never - * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it. - */ - if (hw->mac.type >= e1000_82580) { - stamp = rd32(E1000_SYSTIMR) >> 8; - shift = IGB_82580_TSYNC_SHIFT; + if (state) { + i2cctl |= E1000_I2C_CLK_OUT; + i2cctl &= ~E1000_I2C_CLK_OE_N; + } else { + i2cctl &= ~E1000_I2C_CLK_OUT; + i2cctl &= ~E1000_I2C_CLK_OE_N; } + wr32(E1000_I2CPARAMS, i2cctl); + wrfl(); +} - stamp |= (u64)rd32(E1000_SYSTIML) << shift; - stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32); - return stamp; +/** + * igb_get_i2c_clk - Gets the I2C SCL clock state + * @data: pointer to hardware structure + * + * Gets the I2C clock state + **/ +static int igb_get_i2c_clk(void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = rd32(E1000_I2CPARAMS); + + return !!(i2cctl & E1000_I2C_CLK_IN); } +static const struct i2c_algo_bit_data igb_i2c_algo = { + .setsda = igb_set_i2c_data, + .setscl = igb_set_i2c_clk, + .getsda = igb_get_i2c_data, + .getscl = igb_get_i2c_clk, + .udelay = 5, + .timeout = 20, +}; + /** - * igb_get_hw_dev - return device - * used by hardware layer to print debugging information + * igb_get_hw_dev - return device + * @hw: pointer to hardware structure + * + * used by hardware layer to print debugging information **/ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) { @@ -596,17 +663,17 @@ struct net_device *igb_get_hw_dev(struct e1000_hw *hw) } /** - * igb_init_module - Driver Registration Routine + * igb_init_module - Driver Registration Routine * - * igb_init_module is the first routine called when the driver is - * loaded. All it does is register with the PCI subsystem. + * igb_init_module is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. **/ static int __init igb_init_module(void) { int ret; + pr_info("%s - version %s\n", igb_driver_string, igb_driver_version); - pr_info("%s\n", igb_copyright); #ifdef CONFIG_IGB_DCA @@ -619,10 +686,10 @@ static int __init igb_init_module(void) module_init(igb_init_module); /** - * igb_exit_module - Driver Exit Cleanup Routine + * igb_exit_module - Driver Exit Cleanup Routine * - * igb_exit_module is called just before the driver is removed - * from memory. + * igb_exit_module is called just before the driver is removed + * from memory. **/ static void __exit igb_exit_module(void) { @@ -636,11 +703,11 @@ module_exit(igb_exit_module); #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) /** - * igb_cache_ring_register - Descriptor ring to register mapping - * @adapter: board private structure to initialize + * igb_cache_ring_register - Descriptor ring to register mapping + * @adapter: board private structure to initialize * - * Once we know the feature-set enabled for the device, we'll cache - * the register offset the descriptor ring is assigned to. + * Once we know the feature-set enabled for the device, we'll cache + * the register offset the descriptor ring is assigned to. **/ static void igb_cache_ring_register(struct igb_adapter *adapter) { @@ -657,11 +724,16 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) if (adapter->vfs_allocated_count) { for (; i < adapter->rss_queues; i++) adapter->rx_ring[i]->reg_idx = rbase_offset + - Q_IDX_82576(i); + Q_IDX_82576(i); } + /* Fall through */ case e1000_82575: case e1000_82580: case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* Fall through */ default: for (; i < adapter->num_rx_queues; i++) adapter->rx_ring[i]->reg_idx = rbase_offset + i; @@ -671,102 +743,26 @@ static void igb_cache_ring_register(struct igb_adapter *adapter) } } -static void igb_free_queues(struct igb_adapter *adapter) +u32 igb_rd32(struct e1000_hw *hw, u32 reg) { - int i; + struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw); + u8 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr); + u32 value = 0; - for (i = 0; i < adapter->num_tx_queues; i++) { - kfree(adapter->tx_ring[i]); - adapter->tx_ring[i] = NULL; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - kfree(adapter->rx_ring[i]); - adapter->rx_ring[i] = NULL; - } - adapter->num_rx_queues = 0; - adapter->num_tx_queues = 0; -} + if (E1000_REMOVED(hw_addr)) + return ~value; -/** - * igb_alloc_queues - Allocate memory for all rings - * @adapter: board private structure to initialize - * - * We allocate one ring per queue at run-time since we don't know the - * number of queues at compile-time. - **/ -static int igb_alloc_queues(struct igb_adapter *adapter) -{ - struct igb_ring *ring; - int i; - int orig_node = adapter->node; + value = readl(&hw_addr[reg]); - for (i = 0; i < adapter->num_tx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); - if (!ring) - goto err; - ring->count = adapter->tx_ring_count; - ring->queue_index = i; - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; - /* For 82575, context index must be unique per ring. */ - if (adapter->hw.mac.type == e1000_82575) - set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); - adapter->tx_ring[i] = ring; + /* reads should not return all F's */ + if (!(~value) && (!reg || !(~readl(hw_addr)))) { + struct net_device *netdev = igb->netdev; + hw->hw_addr = NULL; + netif_device_detach(netdev); + netdev_err(netdev, "PCIe link lost, device now detached\n"); } - /* Restore the adapter's original node */ - adapter->node = orig_node; - for (i = 0; i < adapter->num_rx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); - if (!ring) - goto err; - ring->count = adapter->rx_ring_count; - ring->queue_index = i; - ring->dev = &adapter->pdev->dev; - ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; - /* set flag indicating ring supports SCTP checksum offload */ - if (adapter->hw.mac.type >= e1000_82576) - set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); - - /* On i350, loopback VLAN packets have the tag byte-swapped. */ - if (adapter->hw.mac.type == e1000_i350) - set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); - - adapter->rx_ring[i] = ring; - } - /* Restore the adapter's original node */ - adapter->node = orig_node; - - igb_cache_ring_register(adapter); - - return 0; - -err: - /* Restore the adapter's original node */ - adapter->node = orig_node; - igb_free_queues(adapter); - - return -ENOMEM; + return value; } /** @@ -812,21 +808,21 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) switch (hw->mac.type) { case e1000_82575: /* The 82575 assigns vectors using a bitmask, which matches the - bitmask for the EICR/EIMS/EIMC registers. To assign one - or more queues to a vector, we write the appropriate bits - into the MSIXBM register for that vector. */ + * bitmask for the EICR/EIMS/EIMC registers. To assign one + * or more queues to a vector, we write the appropriate bits + * into the MSIXBM register for that vector. + */ if (rx_queue > IGB_N0_QUEUE) msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; if (tx_queue > IGB_N0_QUEUE) msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; - if (!adapter->msix_entries && msix_vector == 0) + if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0) msixbm |= E1000_EIMS_OTHER; array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); q_vector->eims_value = msixbm; break; case e1000_82576: - /* - * 82576 uses a table that essentially consists of 2 columns + /* 82576 uses a table that essentially consists of 2 columns * with 8 rows. The ordering is column-major so we use the * lower 3 bits as the row index, and the 4th bit as the * column offset. @@ -843,8 +839,10 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) break; case e1000_82580: case e1000_i350: - /* - * On 82580 and newer adapters the scheme is similar to 82576 + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* On 82580 and newer adapters the scheme is similar to 82576 * however instead of ordering column-major we have things * ordered row-major. So we traverse the table by using * bit 0 as the column offset, and the remaining bits as the @@ -873,10 +871,11 @@ static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) } /** - * igb_configure_msix - Configure MSI-X hardware + * igb_configure_msix - Configure MSI-X hardware + * @adapter: board private structure to initialize * - * igb_configure_msix sets up the hardware to properly - * generate MSI-X interrupts. + * igb_configure_msix sets up the hardware to properly + * generate MSI-X interrupts. **/ static void igb_configure_msix(struct igb_adapter *adapter) { @@ -900,8 +899,7 @@ static void igb_configure_msix(struct igb_adapter *adapter) wr32(E1000_CTRL_EXT, tmp); /* enable msix_other interrupt */ - array_wr32(E1000_MSIXBM(0), vector++, - E1000_EIMS_OTHER); + array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER); adapter->eims_other = E1000_EIMS_OTHER; break; @@ -909,11 +907,15 @@ static void igb_configure_msix(struct igb_adapter *adapter) case e1000_82576: case e1000_82580: case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: /* Turn on MSI-X capability first, or our settings - * won't stick. And it will take days to debug. */ + * won't stick. And it will take days to debug. + */ wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | - E1000_GPIE_PBA | E1000_GPIE_EIAME | - E1000_GPIE_NSICR); + E1000_GPIE_PBA | E1000_GPIE_EIAME | + E1000_GPIE_NSICR); /* enable msix_other interrupt */ adapter->eims_other = 1 << vector; @@ -935,26 +937,28 @@ static void igb_configure_msix(struct igb_adapter *adapter) } /** - * igb_request_msix - Initialize MSI-X interrupts + * igb_request_msix - Initialize MSI-X interrupts + * @adapter: board private structure to initialize * - * igb_request_msix allocates MSI-X vectors and requests interrupts from the - * kernel. + * igb_request_msix allocates MSI-X vectors and requests interrupts from the + * kernel. **/ static int igb_request_msix(struct igb_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct e1000_hw *hw = &adapter->hw; - int i, err = 0, vector = 0; + int i, err = 0, vector = 0, free_vector = 0; err = request_irq(adapter->msix_entries[vector].vector, - igb_msix_other, 0, netdev->name, adapter); + igb_msix_other, 0, netdev->name, adapter); if (err) - goto out; - vector++; + goto err_out; for (i = 0; i < adapter->num_q_vectors; i++) { struct igb_q_vector *q_vector = adapter->q_vector[i]; + vector++; + q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); if (q_vector->rx.ring && q_vector->tx.ring) @@ -970,77 +974,140 @@ static int igb_request_msix(struct igb_adapter *adapter) sprintf(q_vector->name, "%s-unused", netdev->name); err = request_irq(adapter->msix_entries[vector].vector, - igb_msix_ring, 0, q_vector->name, - q_vector); + igb_msix_ring, 0, q_vector->name, + q_vector); if (err) - goto out; - vector++; + goto err_free; } igb_configure_msix(adapter); return 0; -out: + +err_free: + /* free already assigned IRQs */ + free_irq(adapter->msix_entries[free_vector++].vector, adapter); + + vector--; + for (i = 0; i < vector; i++) { + free_irq(adapter->msix_entries[free_vector++].vector, + adapter->q_vector[i]); + } +err_out: return err; } +/** + * igb_free_q_vector - Free memory allocated for specific interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be freed + * + * This function frees the memory allocated to the q_vector. + **/ +static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) +{ + struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + + adapter->q_vector[v_idx] = NULL; + + /* igb_get_stats64() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + kfree_rcu(q_vector, rcu); +} + +/** + * igb_reset_q_vector - Reset config for interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be reset + * + * If NAPI is enabled it will delete any references to the + * NAPI struct. This is preparation for igb_free_q_vector. + **/ +static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx) +{ + struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + + /* Coming from igb_set_interrupt_capability, the vectors are not yet + * allocated. So, q_vector is NULL so we should stop here. + */ + if (!q_vector) + return; + + if (q_vector->tx.ring) + adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; + + if (q_vector->rx.ring) + adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL; + + netif_napi_del(&q_vector->napi); + +} + static void igb_reset_interrupt_capability(struct igb_adapter *adapter) { - if (adapter->msix_entries) { + int v_idx = adapter->num_q_vectors; + + if (adapter->flags & IGB_FLAG_HAS_MSIX) pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - } else if (adapter->flags & IGB_FLAG_HAS_MSI) { + else if (adapter->flags & IGB_FLAG_HAS_MSI) pci_disable_msi(adapter->pdev); - } + + while (v_idx--) + igb_reset_q_vector(adapter, v_idx); } /** - * igb_free_q_vectors - Free memory allocated for interrupt vectors - * @adapter: board private structure to initialize + * igb_free_q_vectors - Free memory allocated for interrupt vectors + * @adapter: board private structure to initialize * - * This function frees the memory allocated to the q_vectors. In addition if - * NAPI is enabled it will delete any references to the NAPI struct prior - * to freeing the q_vector. + * This function frees the memory allocated to the q_vectors. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. **/ static void igb_free_q_vectors(struct igb_adapter *adapter) { - int v_idx; + int v_idx = adapter->num_q_vectors; - for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { - struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; - adapter->q_vector[v_idx] = NULL; - if (!q_vector) - continue; - netif_napi_del(&q_vector->napi); - kfree(q_vector); - } + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; adapter->num_q_vectors = 0; + + while (v_idx--) { + igb_reset_q_vector(adapter, v_idx); + igb_free_q_vector(adapter, v_idx); + } } /** - * igb_clear_interrupt_scheme - reset the device to a state of no interrupts + * igb_clear_interrupt_scheme - reset the device to a state of no interrupts + * @adapter: board private structure to initialize * - * This function resets the device so that it has 0 rx queues, tx queues, and - * MSI-X interrupts allocated. + * This function resets the device so that it has 0 Rx queues, Tx queues, and + * MSI-X interrupts allocated. */ static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) { - igb_free_queues(adapter); igb_free_q_vectors(adapter); igb_reset_interrupt_capability(adapter); } /** - * igb_set_interrupt_capability - set MSI or MSI-X if supported + * igb_set_interrupt_capability - set MSI or MSI-X if supported + * @adapter: board private structure to initialize + * @msix: boolean value of MSIX capability * - * Attempt to configure interrupts using the best available - * capabilities of the hardware and kernel. + * Attempt to configure interrupts using the best available + * capabilities of the hardware and kernel. **/ -static int igb_set_interrupt_capability(struct igb_adapter *adapter) +static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) { int err; int numvecs, i; + if (!msix) + goto msi_only; + adapter->flags |= IGB_FLAG_HAS_MSIX; + /* Number of supported queues. */ adapter->num_rx_queues = adapter->rss_queues; if (adapter->vfs_allocated_count) @@ -1048,10 +1115,10 @@ static int igb_set_interrupt_capability(struct igb_adapter *adapter) else adapter->num_tx_queues = adapter->rss_queues; - /* start with one vector for every rx queue */ + /* start with one vector for every Rx queue */ numvecs = adapter->num_rx_queues; - /* if tx handler is separate add 1 for every tx queue */ + /* if Tx handler is separate add 1 for every Tx queue */ if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) numvecs += adapter->num_tx_queues; @@ -1060,24 +1127,21 @@ static int igb_set_interrupt_capability(struct igb_adapter *adapter) /* add 1 vector for link status interrupts */ numvecs++; - adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry), - GFP_KERNEL); - if (!adapter->msix_entries) - goto msi_only; - for (i = 0; i < numvecs; i++) adapter->msix_entries[i].entry = i; - err = pci_enable_msix(adapter->pdev, - adapter->msix_entries, - numvecs); - if (err == 0) - goto out; + err = pci_enable_msix_range(adapter->pdev, + adapter->msix_entries, + numvecs, + numvecs); + if (err > 0) + return; igb_reset_interrupt_capability(adapter); /* If we can't do MSI-X, try MSI */ msi_only: + adapter->flags &= ~IGB_FLAG_HAS_MSIX; #ifdef CONFIG_PCI_IOV /* disable SR-IOV for non MSI-X configurations */ if (adapter->vf_data) { @@ -1102,135 +1166,216 @@ msi_only: adapter->num_q_vectors = 1; if (!pci_enable_msi(adapter->pdev)) adapter->flags |= IGB_FLAG_HAS_MSI; -out: - /* Notify the stack of the (possibly) reduced queue counts. */ - netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues); - return netif_set_real_num_rx_queues(adapter->netdev, - adapter->num_rx_queues); +} + +static void igb_add_ring(struct igb_ring *ring, + struct igb_ring_container *head) +{ + head->ring = ring; + head->count++; } /** - * igb_alloc_q_vectors - Allocate memory for interrupt vectors - * @adapter: board private structure to initialize + * igb_alloc_q_vector - Allocate memory for a single interrupt vector + * @adapter: board private structure to initialize + * @v_count: q_vectors allocated on adapter, used for ring interleaving + * @v_idx: index of vector in adapter struct + * @txr_count: total number of Tx rings to allocate + * @txr_idx: index of first Tx ring to allocate + * @rxr_count: total number of Rx rings to allocate + * @rxr_idx: index of first Rx ring to allocate * - * We allocate one q_vector per queue interrupt. If allocation fails we - * return -ENOMEM. + * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ -static int igb_alloc_q_vectors(struct igb_adapter *adapter) +static int igb_alloc_q_vector(struct igb_adapter *adapter, + int v_count, int v_idx, + int txr_count, int txr_idx, + int rxr_count, int rxr_idx) { struct igb_q_vector *q_vector; - struct e1000_hw *hw = &adapter->hw; - int v_idx; - int orig_node = adapter->node; - - for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { - if ((adapter->num_q_vectors == (adapter->num_rx_queues + - adapter->num_tx_queues)) && - (adapter->num_rx_queues == v_idx)) - adapter->node = orig_node; - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL, - adapter->node); - if (!q_vector) - q_vector = kzalloc(sizeof(struct igb_q_vector), - GFP_KERNEL); - if (!q_vector) - goto err_out; - q_vector->adapter = adapter; - q_vector->itr_register = hw->hw_addr + E1000_EITR(0); - q_vector->itr_val = IGB_START_ITR; - netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); - adapter->q_vector[v_idx] = q_vector; - } - /* Restore the adapter's original node */ - adapter->node = orig_node; + struct igb_ring *ring; + int ring_count, size; - return 0; + /* igb only supports 1 Tx and/or 1 Rx queue per vector */ + if (txr_count > 1 || rxr_count > 1) + return -ENOMEM; -err_out: - /* Restore the adapter's original node */ - adapter->node = orig_node; - igb_free_q_vectors(adapter); - return -ENOMEM; -} + ring_count = txr_count + rxr_count; + size = sizeof(struct igb_q_vector) + + (sizeof(struct igb_ring) * ring_count); -static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter, - int ring_idx, int v_idx) -{ - struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + /* allocate q_vector and rings */ + q_vector = adapter->q_vector[v_idx]; + if (!q_vector) + q_vector = kzalloc(size, GFP_KERNEL); + if (!q_vector) + return -ENOMEM; - q_vector->rx.ring = adapter->rx_ring[ring_idx]; - q_vector->rx.ring->q_vector = q_vector; - q_vector->rx.count++; - q_vector->itr_val = adapter->rx_itr_setting; - if (q_vector->itr_val && q_vector->itr_val <= 3) - q_vector->itr_val = IGB_START_ITR; -} + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, + igb_poll, 64); -static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter, - int ring_idx, int v_idx) -{ - struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; - q_vector->tx.ring = adapter->tx_ring[ring_idx]; - q_vector->tx.ring->q_vector = q_vector; - q_vector->tx.count++; - q_vector->itr_val = adapter->tx_itr_setting; + /* initialize work limits */ q_vector->tx.work_limit = adapter->tx_work_limit; - if (q_vector->itr_val && q_vector->itr_val <= 3) - q_vector->itr_val = IGB_START_ITR; + + /* initialize ITR configuration */ + q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0); + q_vector->itr_val = IGB_START_ITR; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + /* intialize ITR */ + if (rxr_count) { + /* rx or rx/tx vector */ + if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) + q_vector->itr_val = adapter->rx_itr_setting; + } else { + /* tx only vector */ + if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) + q_vector->itr_val = adapter->tx_itr_setting; + } + + if (txr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + igb_add_ring(ring, &q_vector->tx); + + /* For 82575, context index must be unique per ring. */ + if (adapter->hw.mac.type == e1000_82575) + set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = txr_idx; + + u64_stats_init(&ring->tx_syncp); + u64_stats_init(&ring->tx_syncp2); + + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; + + /* push pointer to next ring */ + ring++; + } + + if (rxr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + igb_add_ring(ring, &q_vector->rx); + + /* set flag indicating ring supports SCTP checksum offload */ + if (adapter->hw.mac.type >= e1000_82576) + set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); + + /* On i350, i354, i210, and i211, loopback VLAN packets + * have the tag byte-swapped. + */ + if (adapter->hw.mac.type >= e1000_i350) + set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); + + /* apply Rx specific ring traits */ + ring->count = adapter->rx_ring_count; + ring->queue_index = rxr_idx; + + u64_stats_init(&ring->rx_syncp); + + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; + } + + return 0; } + /** - * igb_map_ring_to_vector - maps allocated queues to vectors + * igb_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize * - * This function maps the recently allocated queues to vectors. + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. **/ -static int igb_map_ring_to_vector(struct igb_adapter *adapter) +static int igb_alloc_q_vectors(struct igb_adapter *adapter) { - int i; - int v_idx = 0; + int q_vectors = adapter->num_q_vectors; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int err; - if ((adapter->num_q_vectors < adapter->num_rx_queues) || - (adapter->num_q_vectors < adapter->num_tx_queues)) - return -ENOMEM; + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++) { + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, + 0, 0, 1, rxr_idx); - if (adapter->num_q_vectors >= - (adapter->num_rx_queues + adapter->num_tx_queues)) { - for (i = 0; i < adapter->num_rx_queues; i++) - igb_map_rx_ring_to_vector(adapter, i, v_idx++); - for (i = 0; i < adapter->num_tx_queues; i++) - igb_map_tx_ring_to_vector(adapter, i, v_idx++); - } else { - for (i = 0; i < adapter->num_rx_queues; i++) { - if (i < adapter->num_tx_queues) - igb_map_tx_ring_to_vector(adapter, i, v_idx); - igb_map_rx_ring_to_vector(adapter, i, v_idx++); + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining--; + rxr_idx++; } - for (; i < adapter->num_tx_queues; i++) - igb_map_tx_ring_to_vector(adapter, i, v_idx++); } + + for (; v_idx < q_vectors; v_idx++) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, + tqpv, txr_idx, rqpv, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining -= rqpv; + txr_remaining -= tqpv; + rxr_idx++; + txr_idx++; + } + return 0; + +err_out: + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igb_free_q_vector(adapter, v_idx); + + return -ENOMEM; } /** - * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * @adapter: board private structure to initialize + * @msix: boolean value of MSIX capability * - * This function initializes the interrupts and allocates all of the queues. + * This function initializes the interrupts and allocates all of the queues. **/ -static int igb_init_interrupt_scheme(struct igb_adapter *adapter) +static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix) { struct pci_dev *pdev = adapter->pdev; int err; - err = igb_set_interrupt_capability(adapter); - if (err) - return err; + igb_set_interrupt_capability(adapter, msix); err = igb_alloc_q_vectors(adapter); if (err) { @@ -1238,34 +1383,21 @@ static int igb_init_interrupt_scheme(struct igb_adapter *adapter) goto err_alloc_q_vectors; } - err = igb_alloc_queues(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); - goto err_alloc_queues; - } - - err = igb_map_ring_to_vector(adapter); - if (err) { - dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n"); - goto err_map_queues; - } - + igb_cache_ring_register(adapter); return 0; -err_map_queues: - igb_free_queues(adapter); -err_alloc_queues: - igb_free_q_vectors(adapter); + err_alloc_q_vectors: igb_reset_interrupt_capability(adapter); return err; } /** - * igb_request_irq - initialize interrupts + * igb_request_irq - initialize interrupts + * @adapter: board private structure to initialize * - * Attempts to configure interrupts using the best available - * capabilities of the hardware and kernel. + * Attempts to configure interrupts using the best available + * capabilities of the hardware and kernel. **/ static int igb_request_irq(struct igb_adapter *adapter) { @@ -1273,34 +1405,22 @@ static int igb_request_irq(struct igb_adapter *adapter) struct pci_dev *pdev = adapter->pdev; int err = 0; - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { err = igb_request_msix(adapter); if (!err) goto request_done; /* fall back to MSI */ - igb_clear_interrupt_scheme(adapter); - if (!pci_enable_msi(pdev)) - adapter->flags |= IGB_FLAG_HAS_MSI; igb_free_all_tx_resources(adapter); igb_free_all_rx_resources(adapter); - adapter->num_tx_queues = 1; - adapter->num_rx_queues = 1; - adapter->num_q_vectors = 1; - err = igb_alloc_q_vectors(adapter); - if (err) { - dev_err(&pdev->dev, - "Unable to allocate memory for vectors\n"); - goto request_done; - } - err = igb_alloc_queues(adapter); - if (err) { - dev_err(&pdev->dev, - "Unable to allocate memory for queues\n"); - igb_free_q_vectors(adapter); + + igb_clear_interrupt_scheme(adapter); + err = igb_init_interrupt_scheme(adapter, false); + if (err) goto request_done; - } + igb_setup_all_tx_resources(adapter); igb_setup_all_rx_resources(adapter); + igb_configure(adapter); } igb_assign_vector(adapter->q_vector[0], 0); @@ -1329,7 +1449,7 @@ request_done: static void igb_free_irq(struct igb_adapter *adapter) { - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { int vector = 0, i; free_irq(adapter->msix_entries[vector++].vector, adapter); @@ -1343,20 +1463,20 @@ static void igb_free_irq(struct igb_adapter *adapter) } /** - * igb_irq_disable - Mask off interrupt generation on the NIC - * @adapter: board private structure + * igb_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure **/ static void igb_irq_disable(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - /* - * we need to be careful when disabling interrupts. The VFs are also + /* we need to be careful when disabling interrupts. The VFs are also * mapped into these registers and so clearing the bits can cause * issues on the VF drivers so we only need to clear what we set */ - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 regval = rd32(E1000_EIAM); + wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask); wr32(E1000_EIMC, adapter->eims_enable_mask); regval = rd32(E1000_EIAC); @@ -1366,8 +1486,9 @@ static void igb_irq_disable(struct igb_adapter *adapter) wr32(E1000_IAM, 0); wr32(E1000_IMC, ~0); wrfl(); - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { int i; + for (i = 0; i < adapter->num_q_vectors; i++) synchronize_irq(adapter->msix_entries[i].vector); } else { @@ -1376,16 +1497,17 @@ static void igb_irq_disable(struct igb_adapter *adapter) } /** - * igb_irq_enable - Enable default interrupt generation settings - * @adapter: board private structure + * igb_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure **/ static void igb_irq_enable(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - if (adapter->msix_entries) { + if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; u32 regval = rd32(E1000_EIAC); + wr32(E1000_EIAC, regval | adapter->eims_enable_mask); regval = rd32(E1000_EIAM); wr32(E1000_EIAM, regval | adapter->eims_enable_mask); @@ -1426,13 +1548,12 @@ static void igb_update_mng_vlan(struct igb_adapter *adapter) } /** - * igb_release_hw_control - release control of the h/w to f/w - * @adapter: address of board private structure - * - * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that the - * driver is no longer loaded. + * igb_release_hw_control - release control of the h/w to f/w + * @adapter: address of board private structure * + * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. **/ static void igb_release_hw_control(struct igb_adapter *adapter) { @@ -1446,13 +1567,12 @@ static void igb_release_hw_control(struct igb_adapter *adapter) } /** - * igb_get_hw_control - get control of the h/w from f/w - * @adapter: address of board private structure - * - * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. - * For ASF and Pass Through versions of f/w this means that - * the driver is loaded. + * igb_get_hw_control - get control of the h/w from f/w + * @adapter: address of board private structure * + * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. **/ static void igb_get_hw_control(struct igb_adapter *adapter) { @@ -1466,8 +1586,8 @@ static void igb_get_hw_control(struct igb_adapter *adapter) } /** - * igb_configure - configure the hardware for RX and TX - * @adapter: private board structure + * igb_configure - configure the hardware for RX and TX + * @adapter: private board structure **/ static void igb_configure(struct igb_adapter *adapter) { @@ -1490,7 +1610,8 @@ static void igb_configure(struct igb_adapter *adapter) /* call igb_desc_unused which always leaves * at least 1 descriptor unused to make sure - * next_to_use != next_to_clean */ + * next_to_use != next_to_clean + */ for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = adapter->rx_ring[i]; igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); @@ -1498,21 +1619,22 @@ static void igb_configure(struct igb_adapter *adapter) } /** - * igb_power_up_link - Power up the phy/serdes link - * @adapter: address of board private structure + * igb_power_up_link - Power up the phy/serdes link + * @adapter: address of board private structure **/ void igb_power_up_link(struct igb_adapter *adapter) { + igb_reset_phy(&adapter->hw); + if (adapter->hw.phy.media_type == e1000_media_type_copper) igb_power_up_phy_copper(&adapter->hw); else igb_power_up_serdes_link_82575(&adapter->hw); - igb_reset_phy(&adapter->hw); } /** - * igb_power_down_link - Power down the phy/serdes link - * @adapter: address of board private structure + * igb_power_down_link - Power down the phy/serdes link + * @adapter: address of board private structure */ static void igb_power_down_link(struct igb_adapter *adapter) { @@ -1523,8 +1645,75 @@ static void igb_power_down_link(struct igb_adapter *adapter) } /** - * igb_up - Open the interface and prepare it to handle traffic - * @adapter: board private structure + * Detect and switch function for Media Auto Sense + * @adapter: address of the board private structure + **/ +static void igb_check_swap_media(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_ext, connsw; + bool swap_now = false; + + ctrl_ext = rd32(E1000_CTRL_EXT); + connsw = rd32(E1000_CONNSW); + + /* need to live swap if current media is copper and we have fiber/serdes + * to go to. + */ + + if ((hw->phy.media_type == e1000_media_type_copper) && + (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { + swap_now = true; + } else if (!(connsw & E1000_CONNSW_SERDESD)) { + /* copper signal takes time to appear */ + if (adapter->copper_tries < 4) { + adapter->copper_tries++; + connsw |= E1000_CONNSW_AUTOSENSE_CONF; + wr32(E1000_CONNSW, connsw); + return; + } else { + adapter->copper_tries = 0; + if ((connsw & E1000_CONNSW_PHYSD) && + (!(connsw & E1000_CONNSW_PHY_PDN))) { + swap_now = true; + connsw &= ~E1000_CONNSW_AUTOSENSE_CONF; + wr32(E1000_CONNSW, connsw); + } + } + } + + if (!swap_now) + return; + + switch (hw->phy.media_type) { + case e1000_media_type_copper: + netdev_info(adapter->netdev, + "MAS: changing media to fiber/serdes\n"); + ctrl_ext |= + E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + adapter->copper_tries = 0; + break; + case e1000_media_type_internal_serdes: + case e1000_media_type_fiber: + netdev_info(adapter->netdev, + "MAS: changing media to copper\n"); + ctrl_ext &= + ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + break; + default: + /* shouldn't get here during regular operation */ + netdev_err(adapter->netdev, + "AMS: Invalid media type found, returning\n"); + break; + } + wr32(E1000_CTRL_EXT, ctrl_ext); +} + +/** + * igb_up - Open the interface and prepare it to handle traffic + * @adapter: board private structure **/ int igb_up(struct igb_adapter *adapter) { @@ -1539,7 +1728,7 @@ int igb_up(struct igb_adapter *adapter) for (i = 0; i < adapter->num_q_vectors; i++) napi_enable(&(adapter->q_vector[i]->napi)); - if (adapter->msix_entries) + if (adapter->flags & IGB_FLAG_HAS_MSIX) igb_configure_msix(adapter); else igb_assign_vector(adapter->q_vector[0], 0); @@ -1551,6 +1740,7 @@ int igb_up(struct igb_adapter *adapter) /* notify VFs that reset has been completed */ if (adapter->vfs_allocated_count) { u32 reg_data = rd32(E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; wr32(E1000_CTRL_EXT, reg_data); } @@ -1561,6 +1751,10 @@ int igb_up(struct igb_adapter *adapter) hw->mac.get_link_status = 1; schedule_work(&adapter->watchdog_task); + if ((adapter->flags & IGB_FLAG_EEE) && + (!hw->dev_spec._82575.eee_disable)) + adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T; + return 0; } @@ -1572,7 +1766,8 @@ void igb_down(struct igb_adapter *adapter) int i; /* signal that we're down so the interrupt handler does not - * reschedule our watchdog timer */ + * reschedule our watchdog timer + */ set_bit(__IGB_DOWN, &adapter->state); /* disable receives in the hardware */ @@ -1588,12 +1783,17 @@ void igb_down(struct igb_adapter *adapter) wr32(E1000_TCTL, tctl); /* flush both disables and wait for them to finish */ wrfl(); - msleep(10); + usleep_range(10000, 11000); - for (i = 0; i < adapter->num_q_vectors; i++) + igb_irq_disable(adapter); + + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + + for (i = 0; i < adapter->num_q_vectors; i++) { + napi_synchronize(&(adapter->q_vector[i]->napi)); napi_disable(&(adapter->q_vector[i]->napi)); + } - igb_irq_disable(adapter); del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); @@ -1623,26 +1823,57 @@ void igb_reinit_locked(struct igb_adapter *adapter) { WARN_ON(in_interrupt()); while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - msleep(1); + usleep_range(1000, 2000); igb_down(adapter); igb_up(adapter); clear_bit(__IGB_RESETTING, &adapter->state); } +/** igb_enable_mas - Media Autosense re-enable after swap + * + * @adapter: adapter struct + **/ +static s32 igb_enable_mas(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 connsw; + s32 ret_val = 0; + + connsw = rd32(E1000_CONNSW); + if (!(hw->phy.media_type == e1000_media_type_copper)) + return ret_val; + + /* configure for SerDes media detect */ + if (!(connsw & E1000_CONNSW_SERDESD)) { + connsw |= E1000_CONNSW_ENRGSRC; + connsw |= E1000_CONNSW_AUTOSENSE_EN; + wr32(E1000_CONNSW, connsw); + wrfl(); + } else if (connsw & E1000_CONNSW_SERDESD) { + /* already SerDes, no need to enable anything */ + return ret_val; + } else { + netdev_info(adapter->netdev, + "MAS: Unable to configure feature, disabling..\n"); + adapter->flags &= ~IGB_FLAG_MAS_ENABLE; + } + return ret_val; +} + void igb_reset(struct igb_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; struct e1000_mac_info *mac = &hw->mac; struct e1000_fc_info *fc = &hw->fc; - u32 pba = 0, tx_space, min_tx_space, min_rx_space; - u16 hwm; + u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. */ switch (mac->type) { case e1000_i350: + case e1000_i354: case e1000_82580: pba = rd32(E1000_RXPBS); pba = igb_rxpbs_adjust_82580(pba); @@ -1652,6 +1883,8 @@ void igb_reset(struct igb_adapter *adapter) pba &= E1000_RXPBS_SIZE_MASK_82576; break; case e1000_82575: + case e1000_i210: + case e1000_i211: default: pba = E1000_PBA_34K; break; @@ -1667,14 +1900,16 @@ void igb_reset(struct igb_adapter *adapter) * rounded up to the next 1KB and expressed in KB. Likewise, * the Rx FIFO should be large enough to accommodate at least * one full receive packet and is similarly rounded up and - * expressed in KB. */ + * expressed in KB. + */ pba = rd32(E1000_PBA); /* upper 16 bits has Tx packet buffer allocation size in KB */ tx_space = pba >> 16; /* lower 16 bits has Rx packet buffer allocation size in KB */ pba &= 0xffff; - /* the tx fifo also stores 16 bytes of information about the tx - * but don't include ethernet FCS because hardware appends it */ + /* the Tx fifo also stores 16 bytes of information about the Tx + * but don't include ethernet FCS because hardware appends it + */ min_tx_space = (adapter->max_frame_size + sizeof(union e1000_adv_tx_desc) - ETH_FCS_LEN) * 2; @@ -1687,13 +1922,15 @@ void igb_reset(struct igb_adapter *adapter) /* If current Tx allocation is less than the min Tx FIFO size, * and the min Tx FIFO size is less than the current Rx FIFO - * allocation, take space away from current Rx allocation */ + * allocation, take space away from current Rx allocation + */ if (tx_space < min_tx_space && ((min_tx_space - tx_space) < pba)) { pba = pba - (min_tx_space - tx_space); - /* if short on rx space, rx wins and must trump tx - * adjustment */ + /* if short on Rx space, Rx wins and must trump Tx + * adjustment + */ if (pba < min_rx_space) pba = min_rx_space; } @@ -1705,11 +1942,12 @@ void igb_reset(struct igb_adapter *adapter) * (or the size used for early receive) above it in the Rx FIFO. * Set it to the lower of: * - 90% of the Rx FIFO size, or - * - the full Rx FIFO size minus one full frame */ + * - the full Rx FIFO size minus one full frame + */ hwm = min(((pba << 10) * 9 / 10), ((pba << 10) - 2 * adapter->max_frame_size)); - fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ + fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ fc->low_water = fc->high_water - 16; fc->pause_time = 0xFFFF; fc->send_xon = 1; @@ -1718,6 +1956,7 @@ void igb_reset(struct igb_adapter *adapter) /* disable receive for all VFs and wait one second */ if (adapter->vfs_allocated_count) { int i; + for (i = 0 ; i < adapter->vfs_allocated_count; i++) adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; @@ -1733,10 +1972,53 @@ void igb_reset(struct igb_adapter *adapter) hw->mac.ops.reset_hw(hw); wr32(E1000_WUC, 0); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + /* need to resetup here after media swap */ + adapter->ei.get_invariants(hw); + adapter->flags &= ~IGB_FLAG_MEDIA_RESET; + } + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + if (igb_enable_mas(adapter)) + dev_err(&pdev->dev, + "Error enabling Media Auto Sense\n"); + } if (hw->mac.ops.init_hw(hw)) dev_err(&pdev->dev, "Hardware Error\n"); + /* Flow control settings reset on hardware reset, so guarantee flow + * control is off when forcing speed. + */ + if (!hw->mac.autoneg) + igb_force_mac_fc(hw); + igb_init_dmac(adapter, pba); +#ifdef CONFIG_IGB_HWMON + /* Re-initialize the thermal sensor on i350 devices. */ + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (mac->type == e1000_i350 && hw->bus.func == 0) { + /* If present, re-initialize the external thermal sensor + * interface. + */ + if (adapter->ets) + mac->ops.init_thermal_sensor_thresh(hw); + } + } +#endif + /* Re-establish EEE setting */ + if (hw->phy.media_type == e1000_media_type_copper) { + switch (mac->type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + igb_set_eee_i350(hw); + break; + case e1000_i354: + igb_set_eee_i354(hw); + break; + default: + break; + } + } if (!netif_running(adapter->netdev)) igb_power_down_link(adapter); @@ -1745,20 +2027,22 @@ void igb_reset(struct igb_adapter *adapter) /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); + /* Re-enable PTP, where applicable. */ + igb_ptp_reset(adapter); + igb_get_phy_info(hw); } static netdev_features_t igb_fix_features(struct net_device *netdev, netdev_features_t features) { - /* - * Since there is no support for separate rx/tx vlan accel - * enable/disable make sure tx flag is always in same state as rx. + /* Since there is no support for separate Rx/Tx vlan accel + * enable/disable make sure Tx flag is always in same state as Rx. */ - if (features & NETIF_F_HW_VLAN_RX) - features |= NETIF_F_HW_VLAN_TX; + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; else - features &= ~NETIF_F_HW_VLAN_TX; + features &= ~NETIF_F_HW_VLAN_CTAG_TX; return features; } @@ -1767,10 +2051,21 @@ static int igb_set_features(struct net_device *netdev, netdev_features_t features) { netdev_features_t changed = netdev->features ^ features; + struct igb_adapter *adapter = netdev_priv(netdev); - if (changed & NETIF_F_HW_VLAN_RX) + if (changed & NETIF_F_HW_VLAN_CTAG_RX) igb_vlan_mode(netdev, features); + if (!(changed & NETIF_F_RXALL)) + return 0; + + netdev->features = features; + + if (netif_running(netdev)) + igb_reinit_locked(adapter); + else + igb_reset(adapter); + return 0; } @@ -1789,7 +2084,8 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, .ndo_set_vf_mac = igb_ndo_set_vf_mac, .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, - .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, + .ndo_set_vf_rate = igb_ndo_set_vf_bw, + .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, .ndo_get_vf_config = igb_ndo_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = igb_netpoll, @@ -1799,18 +2095,143 @@ static const struct net_device_ops igb_netdev_ops = { }; /** - * igb_probe - Device Initialization Routine - * @pdev: PCI device information struct - * @ent: entry in igb_pci_tbl + * igb_set_fw_version - Configure version string for ethtool + * @adapter: adapter struct + **/ +void igb_set_fw_version(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_fw_version fw; + + igb_get_fw_version(hw, &fw); + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + if (!(igb_get_flash_presence_i210(hw))) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%2d.%2d-%d", + fw.invm_major, fw.invm_minor, + fw.invm_img_type); + break; + } + /* fall through */ + default: + /* if option is rom valid, display its version too */ + if (fw.or_valid) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x, %d.%d.%d", + fw.eep_major, fw.eep_minor, fw.etrack_id, + fw.or_major, fw.or_build, fw.or_patch); + /* no option rom */ + } else if (fw.etrack_id != 0X0000) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x", + fw.eep_major, fw.eep_minor, fw.etrack_id); + } else { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d.%d", + fw.eep_major, fw.eep_minor, fw.eep_build); + } + break; + } +} + +/** + * igb_init_mas - init Media Autosense feature if enabled in the NVM * - * Returns 0 on success, negative on failure + * @adapter: adapter struct + **/ +static void igb_init_mas(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 eeprom_data; + + hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data); + switch (hw->bus.func) { + case E1000_FUNC_0: + if (eeprom_data & IGB_MAS_ENABLE_0) { + adapter->flags |= IGB_FLAG_MAS_ENABLE; + netdev_info(adapter->netdev, + "MAS: Enabling Media Autosense for port %d\n", + hw->bus.func); + } + break; + case E1000_FUNC_1: + if (eeprom_data & IGB_MAS_ENABLE_1) { + adapter->flags |= IGB_FLAG_MAS_ENABLE; + netdev_info(adapter->netdev, + "MAS: Enabling Media Autosense for port %d\n", + hw->bus.func); + } + break; + case E1000_FUNC_2: + if (eeprom_data & IGB_MAS_ENABLE_2) { + adapter->flags |= IGB_FLAG_MAS_ENABLE; + netdev_info(adapter->netdev, + "MAS: Enabling Media Autosense for port %d\n", + hw->bus.func); + } + break; + case E1000_FUNC_3: + if (eeprom_data & IGB_MAS_ENABLE_3) { + adapter->flags |= IGB_FLAG_MAS_ENABLE; + netdev_info(adapter->netdev, + "MAS: Enabling Media Autosense for port %d\n", + hw->bus.func); + } + break; + default: + /* Shouldn't get here */ + netdev_err(adapter->netdev, + "MAS: Invalid port configuration, returning\n"); + break; + } +} + +/** + * igb_init_i2c - Init I2C interface + * @adapter: pointer to adapter structure + **/ +static s32 igb_init_i2c(struct igb_adapter *adapter) +{ + s32 status = 0; + + /* I2C interface supported on i350 devices */ + if (adapter->hw.mac.type != e1000_i350) + return 0; + + /* Initialize the i2c bus which is controlled by the registers. + * This bus will use the i2c_algo_bit structue that implements + * the protocol through toggling of the 4 bits in the register. + */ + adapter->i2c_adap.owner = THIS_MODULE; + adapter->i2c_algo = igb_i2c_algo; + adapter->i2c_algo.data = adapter; + adapter->i2c_adap.algo_data = &adapter->i2c_algo; + adapter->i2c_adap.dev.parent = &adapter->pdev->dev; + strlcpy(adapter->i2c_adap.name, "igb BB", + sizeof(adapter->i2c_adap.name)); + status = i2c_bit_add_bus(&adapter->i2c_adap); + return status; +} + +/** + * igb_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in igb_pci_tbl + * + * Returns 0 on success, negative on failure * - * igb_probe initializes an adapter identified by a pci_dev structure. - * The OS initialization, configuring of the adapter private structure, - * and a hardware reset occur. + * igb_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. **/ -static int __devinit igb_probe(struct pci_dev *pdev, - const struct pci_device_id *ent) +static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct net_device *netdev; struct igb_adapter *adapter; @@ -1819,9 +2240,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, s32 ret_val; static int global_quad_port_a; /* global quad port a indication */ const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; - unsigned long mmio_start, mmio_len; int err, pci_using_dac; - u16 eeprom_apme_mask = IGB_EEPROM_APME; u8 part_str[E1000_PBANUM_LENGTH]; /* Catch broken hardware that put the wrong VF device ID in @@ -1829,7 +2248,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, */ if (pdev->is_virtfn) { WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n", - pci_name(pdev), pdev->vendor, pdev->device); + pci_name(pdev), pdev->vendor, pdev->device); return -EINVAL; } @@ -1838,26 +2257,21 @@ static int __devinit igb_probe(struct pci_dev *pdev, return err; pci_using_dac = 0; - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (!err) { - err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); - if (!err) - pci_using_dac = 1; + pci_using_dac = 1; } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "No usable DMA " - "configuration, aborting\n"); - goto err_dma; - } + dev_err(&pdev->dev, + "No usable DMA configuration, aborting\n"); + goto err_dma; } } err = pci_request_selected_regions(pdev, pci_select_bars(pdev, - IORESOURCE_MEM), - igb_driver_name); + IORESOURCE_MEM), + igb_driver_name); if (err) goto err_pci_reg; @@ -1880,13 +2294,10 @@ static int __devinit igb_probe(struct pci_dev *pdev, adapter->pdev = pdev; hw = &adapter->hw; hw->back = adapter; - adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE; - - mmio_start = pci_resource_start(pdev, 0); - mmio_len = pci_resource_len(pdev, 0); + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); err = -EIO; - hw->hw_addr = ioremap(mmio_start, mmio_len); + hw->hw_addr = pci_iomap(pdev, 0, 0); if (!hw->hw_addr) goto err_ioremap; @@ -1896,8 +2307,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); - netdev->mem_start = mmio_start; - netdev->mem_end = mmio_start + mmio_len; + netdev->mem_start = pci_resource_start(pdev, 0); + netdev->mem_end = pci_resource_end(pdev, 0); /* PCI config space info */ hw->vendor_id = pdev->vendor; @@ -1935,8 +2346,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, dev_info(&pdev->dev, "PHY reset is blocked due to SOL/IDER session.\n"); - /* - * features is initialized to 0 in allocation, it might have bits + /* features is initialized to 0 in allocation, it might have bits * set by igb_sw_init so we should use an or instead of an * assignment. */ @@ -1947,14 +2357,15 @@ static int __devinit igb_probe(struct pci_dev *pdev, NETIF_F_TSO6 | NETIF_F_RXHASH | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_TX; + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; + netdev->hw_features |= NETIF_F_RXALL; /* set this bit last since it cannot be part of hw_features */ - netdev->features |= NETIF_F_HW_VLAN_FILTER; + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->vlan_features |= NETIF_F_TSO | NETIF_F_TSO6 | @@ -1962,6 +2373,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, NETIF_F_IPV6_CSUM | NETIF_F_SG; + netdev->priv_flags |= IFF_SUPP_NOFCS; + if (pci_using_dac) { netdev->features |= NETIF_F_HIGHDMA; netdev->vlan_features |= NETIF_F_HIGHDMA; @@ -1977,14 +2390,32 @@ static int __devinit igb_probe(struct pci_dev *pdev, adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); /* before reading the NVM, reset the controller to put the device in a - * known good starting state */ + * known good starting state + */ hw->mac.ops.reset_hw(hw); - /* make sure the NVM is good */ - if (hw->nvm.ops.validate(hw) < 0) { - dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); - err = -EIO; - goto err_eeprom; + /* make sure the NVM is good , i211/i210 parts can have special NVM + * that doesn't contain a checksum + */ + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + if (igb_get_flash_presence_i210(hw)) { + if (hw->nvm.ops.validate(hw) < 0) { + dev_err(&pdev->dev, + "The NVM Checksum Is Not Valid\n"); + err = -EIO; + goto err_eeprom; + } + } + break; + default: + if (hw->nvm.ops.validate(hw) < 0) { + dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); + err = -EIO; + goto err_eeprom; + } + break; } /* copy the MAC address out of the NVM */ @@ -1992,18 +2423,26 @@ static int __devinit igb_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "NVM Read Error\n"); memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); - memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); - if (!is_valid_ether_addr(netdev->perm_addr)) { + if (!is_valid_ether_addr(netdev->dev_addr)) { dev_err(&pdev->dev, "Invalid MAC Address\n"); err = -EIO; goto err_eeprom; } + /* get firmware version for ethtool -i */ + igb_set_fw_version(adapter); + + /* configure RXPBSIZE and TXPBSIZE */ + if (hw->mac.type == e1000_i210) { + wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT); + wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT); + } + setup_timer(&adapter->watchdog_timer, igb_watchdog, - (unsigned long) adapter); + (unsigned long) adapter); setup_timer(&adapter->phy_info_timer, igb_update_phy_info, - (unsigned long) adapter); + (unsigned long) adapter); INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); @@ -2018,59 +2457,82 @@ static int __devinit igb_probe(struct pci_dev *pdev, igb_validate_mdi_setting(hw); - /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM, - * enable the ACPI Magic Packet filter - */ - + /* By default, support wake on port A */ if (hw->bus.func == 0) - hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data); - else if (hw->mac.type >= e1000_82580) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* Check the NVM for wake support on non-port A ports */ + if (hw->mac.type >= e1000_82580) hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + - NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, - &eeprom_data); + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, + &eeprom_data); else if (hw->bus.func == 1) hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); - if (eeprom_data & eeprom_apme_mask) - adapter->eeprom_wol |= E1000_WUFC_MAG; + if (eeprom_data & IGB_EEPROM_APME) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; /* now that we have the eeprom settings, apply the special cases where * the eeprom may be wrong or the board simply won't support wake on - * lan on a particular port */ + * lan on a particular port + */ switch (pdev->device) { case E1000_DEV_ID_82575GB_QUAD_COPPER: - adapter->eeprom_wol = 0; + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; break; case E1000_DEV_ID_82575EB_FIBER_SERDES: case E1000_DEV_ID_82576_FIBER: case E1000_DEV_ID_82576_SERDES: /* Wake events only supported on port A for dual fiber - * regardless of eeprom setting */ + * regardless of eeprom setting + */ if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) - adapter->eeprom_wol = 0; + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; break; case E1000_DEV_ID_82576_QUAD_COPPER: case E1000_DEV_ID_82576_QUAD_COPPER_ET2: /* if quad port adapter, disable WoL on all but port A */ if (global_quad_port_a != 0) - adapter->eeprom_wol = 0; + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; else adapter->flags |= IGB_FLAG_QUAD_PORT_A; /* Reset for multiple quad port adapters */ if (++global_quad_port_a == 4) global_quad_port_a = 0; break; + default: + /* If the device can't wake, don't set software support */ + if (!device_can_wakeup(&adapter->pdev->dev)) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; } /* initialize the wol settings based on the eeprom settings */ - adapter->wol = adapter->eeprom_wol; - device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) + adapter->wol |= E1000_WUFC_MAG; + + /* Some vendors want WoL disabled by default, but still supported */ + if ((hw->mac.type == e1000_i350) && + (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + + device_set_wakeup_enable(&adapter->pdev->dev, + adapter->flags & IGB_FLAG_WOL_SUPPORTED); /* reset the hardware with the new settings */ igb_reset(adapter); + /* Init the I2C interface */ + err = igb_init_i2c(adapter); + if (err) { + dev_err(&pdev->dev, "failed to init i2c interface\n"); + goto err_eeprom; + } + /* let the f/w know that the h/w is now under the control of the - * driver. */ + * driver. + */ igb_get_hw_control(adapter); strcpy(netdev->name, "eth%d"); @@ -2089,44 +2551,102 @@ static int __devinit igb_probe(struct pci_dev *pdev, } #endif +#ifdef CONFIG_IGB_HWMON + /* Initialize the thermal sensor on i350 devices. */ + if (hw->mac.type == e1000_i350 && hw->bus.func == 0) { + u16 ets_word; + + /* Read the NVM to determine if this i350 device supports an + * external thermal sensor. + */ + hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word); + if (ets_word != 0x0000 && ets_word != 0xFFFF) + adapter->ets = true; + else + adapter->ets = false; + if (igb_sysfs_init(adapter)) + dev_err(&pdev->dev, + "failed to allocate sysfs resources\n"); + } else { + adapter->ets = false; + } +#endif + /* Check if Media Autosense is enabled */ + adapter->ei = *ei; + if (hw->dev_spec._82575.mas_capable) + igb_init_mas(adapter); + /* do hw tstamp init after resetting */ - igb_init_hw_timer(adapter); + igb_ptp_init(adapter); dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); - /* print bus type/speed/width info */ - dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", - netdev->name, - ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : - (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : - "unknown"), - ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : - (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : - (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : - "unknown"), - netdev->dev_addr); - - ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH); + /* print bus type/speed/width info, not applicable to i354 */ + if (hw->mac.type != e1000_i354) { + dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", + netdev->name, + ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" : + (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" : + "unknown"), + ((hw->bus.width == e1000_bus_width_pcie_x4) ? + "Width x4" : + (hw->bus.width == e1000_bus_width_pcie_x2) ? + "Width x2" : + (hw->bus.width == e1000_bus_width_pcie_x1) ? + "Width x1" : "unknown"), netdev->dev_addr); + } + + if ((hw->mac.type >= e1000_i210 || + igb_get_flash_presence_i210(hw))) { + ret_val = igb_read_part_string(hw, part_str, + E1000_PBANUM_LENGTH); + } else { + ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND; + } + if (ret_val) strcpy(part_str, "Unknown"); dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str); dev_info(&pdev->dev, "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", - adapter->msix_entries ? "MSI-X" : + (adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" : (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", adapter->num_rx_queues, adapter->num_tx_queues); - switch (hw->mac.type) { - case e1000_i350: - igb_set_eee_i350(hw); - break; - default: - break; + if (hw->phy.media_type == e1000_media_type_copper) { + switch (hw->mac.type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + /* Enable EEE for internal copper PHY devices */ + err = igb_set_eee_i350(hw); + if ((!err) && + (!hw->dev_spec._82575.eee_disable)) { + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + adapter->flags |= IGB_FLAG_EEE; + } + break; + case e1000_i354: + if ((rd32(E1000_CTRL_EXT) & + E1000_CTRL_EXT_LINK_MODE_SGMII)) { + err = igb_set_eee_i354(hw); + if ((!err) && + (!hw->dev_spec._82575.eee_disable)) { + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + adapter->flags |= IGB_FLAG_EEE; + } + } + break; + default: + break; + } } - pm_runtime_put_noidle(&pdev->dev); return 0; err_register: igb_release_hw_control(adapter); + memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); err_eeprom: if (!igb_check_reset_block(hw)) igb_reset_phy(hw); @@ -2135,37 +2655,142 @@ err_eeprom: iounmap(hw->flash_address); err_sw_init: igb_clear_interrupt_scheme(adapter); - iounmap(hw->hw_addr); + pci_iounmap(pdev, hw->hw_addr); err_ioremap: free_netdev(netdev); err_alloc_etherdev: pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_select_bars(pdev, IORESOURCE_MEM)); err_pci_reg: err_dma: pci_disable_device(pdev); return err; } +#ifdef CONFIG_PCI_IOV +static int igb_disable_sriov(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + /* reclaim resources allocated to VFs */ + if (adapter->vf_data) { + /* disable iov and allow time for transactions to clear */ + if (pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, + "Cannot deallocate SR-IOV virtual functions while they are assigned - VFs will not be deallocated\n"); + return -EPERM; + } else { + pci_disable_sriov(pdev); + msleep(500); + } + + kfree(adapter->vf_data); + adapter->vf_data = NULL; + adapter->vfs_allocated_count = 0; + wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); + wrfl(); + msleep(100); + dev_info(&pdev->dev, "IOV Disabled\n"); + + /* Re-enable DMA Coalescing flag since IOV is turned off */ + adapter->flags |= IGB_FLAG_DMAC; + } + + return 0; +} + +static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + int old_vfs = pci_num_vf(pdev); + int err = 0; + int i; + + if (!(adapter->flags & IGB_FLAG_HAS_MSIX) || num_vfs > 7) { + err = -EPERM; + goto out; + } + if (!num_vfs) + goto out; + + if (old_vfs) { + dev_info(&pdev->dev, "%d pre-allocated VFs found - override max_vfs setting of %d\n", + old_vfs, max_vfs); + adapter->vfs_allocated_count = old_vfs; + } else + adapter->vfs_allocated_count = num_vfs; + + adapter->vf_data = kcalloc(adapter->vfs_allocated_count, + sizeof(struct vf_data_storage), GFP_KERNEL); + + /* if allocation failed then we do not support SR-IOV */ + if (!adapter->vf_data) { + adapter->vfs_allocated_count = 0; + dev_err(&pdev->dev, + "Unable to allocate memory for VF Data Storage\n"); + err = -ENOMEM; + goto out; + } + + /* only call pci_enable_sriov() if no VFs are allocated already */ + if (!old_vfs) { + err = pci_enable_sriov(pdev, adapter->vfs_allocated_count); + if (err) + goto err_out; + } + dev_info(&pdev->dev, "%d VFs allocated\n", + adapter->vfs_allocated_count); + for (i = 0; i < adapter->vfs_allocated_count; i++) + igb_vf_configure(adapter, i); + + /* DMA Coalescing is not supported in IOV mode. */ + adapter->flags &= ~IGB_FLAG_DMAC; + goto out; + +err_out: + kfree(adapter->vf_data); + adapter->vf_data = NULL; + adapter->vfs_allocated_count = 0; +out: + return err; +} + +#endif +/** + * igb_remove_i2c - Cleanup I2C interface + * @adapter: pointer to adapter structure + **/ +static void igb_remove_i2c(struct igb_adapter *adapter) +{ + /* free the adapter bus structure */ + i2c_del_adapter(&adapter->i2c_adap); +} + /** - * igb_remove - Device Removal Routine - * @pdev: PCI device information struct + * igb_remove - Device Removal Routine + * @pdev: PCI device information struct * - * igb_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. The could be caused by a - * Hot-Plug event, or because the driver is going to be removed from - * memory. + * igb_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. **/ -static void __devexit igb_remove(struct pci_dev *pdev) +static void igb_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; pm_runtime_get_noresume(&pdev->dev); - - /* - * The watchdog timer may be rescheduled, so explicitly +#ifdef CONFIG_IGB_HWMON + igb_sysfs_exit(adapter); +#endif + igb_remove_i2c(adapter); + igb_ptp_stop(adapter); + /* The watchdog timer may be rescheduled, so explicitly * disable watchdog from being rescheduled. */ set_bit(__IGB_DOWN, &adapter->state); @@ -2185,7 +2810,8 @@ static void __devexit igb_remove(struct pci_dev *pdev) #endif /* Release control of h/w to f/w. If f/w is AMT enabled, this - * would have already happened in close and is redundant. */ + * would have already happened in close and is redundant. + */ igb_release_hw_control(adapter); unregister_netdev(netdev); @@ -2193,30 +2819,14 @@ static void __devexit igb_remove(struct pci_dev *pdev) igb_clear_interrupt_scheme(adapter); #ifdef CONFIG_PCI_IOV - /* reclaim resources allocated to VFs */ - if (adapter->vf_data) { - /* disable iov and allow time for transactions to clear */ - if (!igb_check_vf_assignment(adapter)) { - pci_disable_sriov(pdev); - msleep(500); - } else { - dev_info(&pdev->dev, "VF(s) assigned to guests!\n"); - } - - kfree(adapter->vf_data); - adapter->vf_data = NULL; - wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ); - wrfl(); - msleep(100); - dev_info(&pdev->dev, "IOV Disabled\n"); - } + igb_disable_sriov(pdev); #endif - iounmap(hw->hw_addr); + pci_iounmap(pdev, hw->hw_addr); if (hw->flash_address) iounmap(hw->flash_address); pci_release_selected_regions(pdev, - pci_select_bars(pdev, IORESOURCE_MEM)); + pci_select_bars(pdev, IORESOURCE_MEM)); kfree(adapter->shadow_vfta); free_netdev(netdev); @@ -2227,176 +2837,104 @@ static void __devexit igb_remove(struct pci_dev *pdev) } /** - * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space - * @adapter: board private structure to initialize + * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space + * @adapter: board private structure to initialize * - * This function initializes the vf specific data storage and then attempts to - * allocate the VFs. The reason for ordering it this way is because it is much - * mor expensive time wise to disable SR-IOV than it is to allocate and free - * the memory for the VFs. + * This function initializes the vf specific data storage and then attempts to + * allocate the VFs. The reason for ordering it this way is because it is much + * mor expensive time wise to disable SR-IOV than it is to allocate and free + * the memory for the VFs. **/ -static void __devinit igb_probe_vfs(struct igb_adapter * adapter) +static void igb_probe_vfs(struct igb_adapter *adapter) { #ifdef CONFIG_PCI_IOV struct pci_dev *pdev = adapter->pdev; - int old_vfs = igb_find_enabled_vfs(adapter); - int i; - - if (old_vfs) { - dev_info(&pdev->dev, "%d pre-allocated VFs found - override " - "max_vfs setting of %d\n", old_vfs, max_vfs); - adapter->vfs_allocated_count = old_vfs; - } + struct e1000_hw *hw = &adapter->hw; - if (!adapter->vfs_allocated_count) + /* Virtualization features not supported on i210 family. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) return; - adapter->vf_data = kcalloc(adapter->vfs_allocated_count, - sizeof(struct vf_data_storage), GFP_KERNEL); - /* if allocation failed then we do not support SR-IOV */ - if (!adapter->vf_data) { - adapter->vfs_allocated_count = 0; - dev_err(&pdev->dev, "Unable to allocate memory for VF " - "Data Storage\n"); - goto out; - } - - if (!old_vfs) { - if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) - goto err_out; - } - dev_info(&pdev->dev, "%d VFs allocated\n", - adapter->vfs_allocated_count); - for (i = 0; i < adapter->vfs_allocated_count; i++) - igb_vf_configure(adapter, i); + pci_sriov_set_totalvfs(pdev, 7); + igb_pci_enable_sriov(pdev, max_vfs); - /* DMA Coalescing is not supported in IOV mode. */ - adapter->flags &= ~IGB_FLAG_DMAC; - goto out; -err_out: - kfree(adapter->vf_data); - adapter->vf_data = NULL; - adapter->vfs_allocated_count = 0; -out: - return; #endif /* CONFIG_PCI_IOV */ } -/** - * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp - * @adapter: board private structure to initialize - * - * igb_init_hw_timer initializes the function pointer and values for the hw - * timer found in hardware. - **/ -static void igb_init_hw_timer(struct igb_adapter *adapter) +static void igb_init_queue_configuration(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + u32 max_rss_queues; + /* Determine the maximum number of RSS queues supported. */ switch (hw->mac.type) { + case e1000_i211: + max_rss_queues = IGB_MAX_RX_QUEUES_I211; + break; + case e1000_82575: + case e1000_i210: + max_rss_queues = IGB_MAX_RX_QUEUES_82575; + break; case e1000_i350: + /* I350 cannot do RSS and SR-IOV at the same time */ + if (!!adapter->vfs_allocated_count) { + max_rss_queues = 1; + break; + } + /* fall through */ + case e1000_82576: + if (!!adapter->vfs_allocated_count) { + max_rss_queues = 2; + break; + } + /* fall through */ case e1000_82580: - memset(&adapter->cycles, 0, sizeof(adapter->cycles)); - adapter->cycles.read = igb_read_clock; - adapter->cycles.mask = CLOCKSOURCE_MASK(64); - adapter->cycles.mult = 1; - /* - * The 82580 timesync updates the system timer every 8ns by 8ns - * and the value cannot be shifted. Instead we need to shift - * the registers to generate a 64bit timer value. As a result - * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by - * 24 in order to generate a larger value for synchronization. - */ - adapter->cycles.shift = IGB_82580_TSYNC_SHIFT; - /* disable system timer temporarily by setting bit 31 */ - wr32(E1000_TSAUXC, 0x80000000); - wrfl(); - - /* Set registers so that rollover occurs soon to test this. */ - wr32(E1000_SYSTIMR, 0x00000000); - wr32(E1000_SYSTIML, 0x80000000); - wr32(E1000_SYSTIMH, 0x000000FF); - wrfl(); + case e1000_i354: + default: + max_rss_queues = IGB_MAX_RX_QUEUES; + break; + } - /* enable system timer by clearing bit 31 */ - wr32(E1000_TSAUXC, 0x0); - wrfl(); + adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus()); - timecounter_init(&adapter->clock, - &adapter->cycles, - ktime_to_ns(ktime_get_real())); - /* - * Synchronize our NIC clock against system wall clock. NIC - * time stamp reading requires ~3us per sample, each sample - * was pretty stable even under load => only require 10 - * samples for each offset comparison. - */ - memset(&adapter->compare, 0, sizeof(adapter->compare)); - adapter->compare.source = &adapter->clock; - adapter->compare.target = ktime_get_real; - adapter->compare.num_samples = 10; - timecompare_update(&adapter->compare, 0); + /* Determine if we need to pair queues. */ + switch (hw->mac.type) { + case e1000_82575: + case e1000_i211: + /* Device supports enough interrupts without queue pairing. */ break; case e1000_82576: - /* - * Initialize hardware timer: we keep it running just in case - * that some program needs it later on. - */ - memset(&adapter->cycles, 0, sizeof(adapter->cycles)); - adapter->cycles.read = igb_read_clock; - adapter->cycles.mask = CLOCKSOURCE_MASK(64); - adapter->cycles.mult = 1; - /** - * Scale the NIC clock cycle by a large factor so that - * relatively small clock corrections can be added or - * subtracted at each clock tick. The drawbacks of a large - * factor are a) that the clock register overflows more quickly - * (not such a big deal) and b) that the increment per tick has - * to fit into 24 bits. As a result we need to use a shift of - * 19 so we can fit a value of 16 into the TIMINCA register. - */ - adapter->cycles.shift = IGB_82576_TSYNC_SHIFT; - wr32(E1000_TIMINCA, - (1 << E1000_TIMINCA_16NS_SHIFT) | - (16 << IGB_82576_TSYNC_SHIFT)); - - /* Set registers so that rollover occurs soon to test this. */ - wr32(E1000_SYSTIML, 0x00000000); - wr32(E1000_SYSTIMH, 0xFF800000); - wrfl(); - - timecounter_init(&adapter->clock, - &adapter->cycles, - ktime_to_ns(ktime_get_real())); - /* - * Synchronize our NIC clock against system wall clock. NIC - * time stamp reading requires ~3us per sample, each sample - * was pretty stable even under load => only require 10 - * samples for each offset comparison. + /* If VFs are going to be allocated with RSS queues then we + * should pair the queues in order to conserve interrupts due + * to limited supply. */ - memset(&adapter->compare, 0, sizeof(adapter->compare)); - adapter->compare.source = &adapter->clock; - adapter->compare.target = ktime_get_real; - adapter->compare.num_samples = 10; - timecompare_update(&adapter->compare, 0); - break; - case e1000_82575: - /* 82575 does not support timesync */ + if ((adapter->rss_queues > 1) && + (adapter->vfs_allocated_count > 6)) + adapter->flags |= IGB_FLAG_QUEUE_PAIRS; + /* fall through */ + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: default: + /* If rss_queues > half of max_rss_queues, pair the queues in + * order to conserve interrupts due to limited supply. + */ + if (adapter->rss_queues > (max_rss_queues / 2)) + adapter->flags |= IGB_FLAG_QUEUE_PAIRS; break; } - } /** - * igb_sw_init - Initialize general software structures (struct igb_adapter) - * @adapter: board private structure to initialize + * igb_sw_init - Initialize general software structures (struct igb_adapter) + * @adapter: board private structure to initialize * - * igb_sw_init initializes the Adapter private data structure. - * Fields are initialized based on PCI device information and - * OS network device settings (MTU size). + * igb_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). **/ -static int __devinit igb_sw_init(struct igb_adapter *adapter) +static int igb_sw_init(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; @@ -2419,8 +2957,6 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; - adapter->node = -1; - spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { @@ -2429,35 +2965,26 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) if (max_vfs > 7) { dev_warn(&pdev->dev, "Maximum of 7 VFs per PF, using max\n"); - adapter->vfs_allocated_count = 7; + max_vfs = adapter->vfs_allocated_count = 7; } else adapter->vfs_allocated_count = max_vfs; + if (adapter->vfs_allocated_count) + dev_warn(&pdev->dev, + "Enabling SR-IOV VFs using the module parameter is deprecated - please use the pci sysfs interface.\n"); break; default: break; } #endif /* CONFIG_PCI_IOV */ - adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus()); - /* i350 cannot do RSS and SR-IOV at the same time */ - if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count) - adapter->rss_queues = 1; - - /* - * if rss_queues > 4 or vfs are going to be allocated with rss_queues - * then we should combine the queues into a queue pair in order to - * conserve interrupts due to limited supply - */ - if ((adapter->rss_queues > 4) || - ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6))) - adapter->flags |= IGB_FLAG_QUEUE_PAIRS; + + igb_init_queue_configuration(adapter); /* Setup and initialize a copy of the hw vlan table array */ - adapter->shadow_vfta = kzalloc(sizeof(u32) * - E1000_VLAN_FILTER_TBL_SIZE, - GFP_ATOMIC); + adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32), + GFP_ATOMIC); /* This call may decrease the number of queues */ - if (igb_init_interrupt_scheme(adapter)) { + if (igb_init_interrupt_scheme(adapter, true)) { dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); return -ENOMEM; } @@ -2467,7 +2994,7 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) /* Explicitly disable IRQ since the NIC can be in any state. */ igb_irq_disable(adapter); - if (hw->mac.type == e1000_i350) + if (hw->mac.type >= e1000_i350) adapter->flags &= ~IGB_FLAG_DMAC; set_bit(__IGB_DOWN, &adapter->state); @@ -2475,16 +3002,16 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) } /** - * igb_open - Called when a network interface is made active - * @netdev: network interface device structure + * igb_open - Called when a network interface is made active + * @netdev: network interface device structure * - * Returns 0 on success, negative value on failure + * Returns 0 on success, negative value on failure * - * The open entry point is called when a network interface is made - * active by the system (IFF_UP). At this point all resources needed - * for transmit and receive operations are allocated, the interrupt - * handler is registered with the OS, the watchdog timer is started, - * and the stack is notified that the interface is ready. + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. **/ static int __igb_open(struct net_device *netdev, bool resuming) { @@ -2520,13 +3047,25 @@ static int __igb_open(struct net_device *netdev, bool resuming) /* before we allocate an interrupt, we must be ready to handle it. * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt * as soon as we call pci_request_irq, so we have to setup our - * clean_rx handler before we do so. */ + * clean_rx handler before we do so. + */ igb_configure(adapter); err = igb_request_irq(adapter); if (err) goto err_req_irq; + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(adapter->netdev, + adapter->num_tx_queues); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(adapter->netdev, + adapter->num_rx_queues); + if (err) + goto err_set_queues; + /* From here on the code is the same as igb_up() */ clear_bit(__IGB_DOWN, &adapter->state); @@ -2541,6 +3080,7 @@ static int __igb_open(struct net_device *netdev, bool resuming) /* notify VFs that reset has been completed */ if (adapter->vfs_allocated_count) { u32 reg_data = rd32(E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; wr32(E1000_CTRL_EXT, reg_data); } @@ -2556,6 +3096,8 @@ static int __igb_open(struct net_device *netdev, bool resuming) return 0; +err_set_queues: + igb_free_irq(adapter); err_req_irq: igb_release_hw_control(adapter); igb_power_down_link(adapter); @@ -2576,15 +3118,15 @@ static int igb_open(struct net_device *netdev) } /** - * igb_close - Disables a network interface - * @netdev: network interface device structure + * igb_close - Disables a network interface + * @netdev: network interface device structure * - * Returns 0, this is not allowed to fail + * Returns 0, this is not allowed to fail * - * The close entry point is called when an interface is de-activated - * by the OS. The hardware is still under the driver's control, but - * needs to be disabled. A global MAC reset is issued to stop the - * hardware, and all transmit and receive resources are freed. + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the driver's control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. **/ static int __igb_close(struct net_device *netdev, bool suspending) { @@ -2613,21 +3155,19 @@ static int igb_close(struct net_device *netdev) } /** - * igb_setup_tx_resources - allocate Tx resources (Descriptors) - * @tx_ring: tx descriptor ring (for a specific queue) to setup + * igb_setup_tx_resources - allocate Tx resources (Descriptors) + * @tx_ring: tx descriptor ring (for a specific queue) to setup * - * Return 0 on success, negative on failure + * Return 0 on success, negative on failure **/ int igb_setup_tx_resources(struct igb_ring *tx_ring) { struct device *dev = tx_ring->dev; - int orig_node = dev_to_node(dev); int size; size = sizeof(struct igb_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); - if (!tx_ring->tx_buffer_info) - tx_ring->tx_buffer_info = vzalloc(size); + + tx_ring->tx_buffer_info = vzalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -2635,18 +3175,8 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); - set_dev_node(dev, tx_ring->numa_node); - tx_ring->desc = dma_alloc_coherent(dev, - tx_ring->size, - &tx_ring->dma, - GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!tx_ring->desc) - tx_ring->desc = dma_alloc_coherent(dev, - tx_ring->size, - &tx_ring->dma, - GFP_KERNEL); - + tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) goto err; @@ -2657,17 +3187,17 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) err: vfree(tx_ring->tx_buffer_info); - dev_err(dev, - "Unable to allocate memory for the transmit descriptor ring\n"); + tx_ring->tx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); return -ENOMEM; } /** - * igb_setup_all_tx_resources - wrapper to allocate Tx resources - * (Descriptors) for all queues - * @adapter: board private structure + * igb_setup_all_tx_resources - wrapper to allocate Tx resources + * (Descriptors) for all queues + * @adapter: board private structure * - * Return 0 on success, negative on failure + * Return 0 on success, negative on failure **/ static int igb_setup_all_tx_resources(struct igb_adapter *adapter) { @@ -2689,8 +3219,8 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter) } /** - * igb_setup_tctl - configure the transmit control registers - * @adapter: Board private structure + * igb_setup_tctl - configure the transmit control registers + * @adapter: Board private structure **/ void igb_setup_tctl(struct igb_adapter *adapter) { @@ -2715,14 +3245,14 @@ void igb_setup_tctl(struct igb_adapter *adapter) } /** - * igb_configure_tx_ring - Configure transmit ring after Reset - * @adapter: board private structure - * @ring: tx ring to configure + * igb_configure_tx_ring - Configure transmit ring after Reset + * @adapter: board private structure + * @ring: tx ring to configure * - * Configure a transmit ring after a reset. + * Configure a transmit ring after a reset. **/ void igb_configure_tx_ring(struct igb_adapter *adapter, - struct igb_ring *ring) + struct igb_ring *ring) { struct e1000_hw *hw = &adapter->hw; u32 txdctl = 0; @@ -2735,9 +3265,9 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, mdelay(10); wr32(E1000_TDLEN(reg_idx), - ring->count * sizeof(union e1000_adv_tx_desc)); + ring->count * sizeof(union e1000_adv_tx_desc)); wr32(E1000_TDBAL(reg_idx), - tdba & 0x00000000ffffffffULL); + tdba & 0x00000000ffffffffULL); wr32(E1000_TDBAH(reg_idx), tdba >> 32); ring->tail = hw->hw_addr + E1000_TDT(reg_idx); @@ -2753,10 +3283,10 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, } /** - * igb_configure_tx - Configure transmit Unit after Reset - * @adapter: board private structure + * igb_configure_tx - Configure transmit Unit after Reset + * @adapter: board private structure * - * Configure the Tx unit of the MAC after a reset. + * Configure the Tx unit of the MAC after a reset. **/ static void igb_configure_tx(struct igb_adapter *adapter) { @@ -2767,45 +3297,32 @@ static void igb_configure_tx(struct igb_adapter *adapter) } /** - * igb_setup_rx_resources - allocate Rx resources (Descriptors) - * @rx_ring: rx descriptor ring (for a specific queue) to setup + * igb_setup_rx_resources - allocate Rx resources (Descriptors) + * @rx_ring: Rx descriptor ring (for a specific queue) to setup * - * Returns 0 on success, negative on failure + * Returns 0 on success, negative on failure **/ int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct device *dev = rx_ring->dev; - int orig_node = dev_to_node(dev); - int size, desc_len; + int size; size = sizeof(struct igb_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); - if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vzalloc(size); + + rx_ring->rx_buffer_info = vzalloc(size); if (!rx_ring->rx_buffer_info) goto err; - desc_len = sizeof(union e1000_adv_rx_desc); - /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * desc_len; + rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - set_dev_node(dev, rx_ring->numa_node); - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!rx_ring->desc) - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); - + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) goto err; + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; @@ -2814,17 +3331,16 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) err: vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; - dev_err(dev, "Unable to allocate memory for the receive descriptor" - " ring\n"); + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); return -ENOMEM; } /** - * igb_setup_all_rx_resources - wrapper to allocate Rx resources - * (Descriptors) for all queues - * @adapter: board private structure + * igb_setup_all_rx_resources - wrapper to allocate Rx resources + * (Descriptors) for all queues + * @adapter: board private structure * - * Return 0 on success, negative on failure + * Return 0 on success, negative on failure **/ static int igb_setup_all_rx_resources(struct igb_adapter *adapter) { @@ -2846,68 +3362,44 @@ static int igb_setup_all_rx_resources(struct igb_adapter *adapter) } /** - * igb_setup_mrqc - configure the multiple receive queue control registers - * @adapter: Board private structure + * igb_setup_mrqc - configure the multiple receive queue control registers + * @adapter: Board private structure **/ static void igb_setup_mrqc(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mrqc, rxcsum; - u32 j, num_rx_queues, shift = 0, shift2 = 0; - union e1000_reta { - u32 dword; - u8 bytes[4]; - } reta; - static const u8 rsshash[40] = { - 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, - 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, - 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, - 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; + u32 j, num_rx_queues; + static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, + 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, + 0xA32DCB77, 0x0CF23080, 0x3BB7426A, + 0xFA01ACBE }; /* Fill out hash function seeds */ - for (j = 0; j < 10; j++) { - u32 rsskey = rsshash[(j * 4)]; - rsskey |= rsshash[(j * 4) + 1] << 8; - rsskey |= rsshash[(j * 4) + 2] << 16; - rsskey |= rsshash[(j * 4) + 3] << 24; - array_wr32(E1000_RSSRK(0), j, rsskey); - } + for (j = 0; j < 10; j++) + wr32(E1000_RSSRK(j), rsskey[j]); num_rx_queues = adapter->rss_queues; - if (adapter->vfs_allocated_count) { - /* 82575 and 82576 supports 2 RSS queues for VMDq */ - switch (hw->mac.type) { - case e1000_i350: - case e1000_82580: - num_rx_queues = 1; - shift = 0; - break; - case e1000_82576: - shift = 3; + switch (hw->mac.type) { + case e1000_82576: + /* 82576 supports 2 RSS queues for SR-IOV */ + if (adapter->vfs_allocated_count) num_rx_queues = 2; - break; - case e1000_82575: - shift = 2; - shift2 = 6; - default: - break; - } - } else { - if (hw->mac.type == e1000_82575) - shift = 6; + break; + default: + break; } - for (j = 0; j < (32 * 4); j++) { - reta.bytes[j & 3] = (j % num_rx_queues) << shift; - if (shift2) - reta.bytes[j & 3] |= num_rx_queues << shift2; - if ((j & 3) == 3) - wr32(E1000_RETA(j >> 2), reta.dword); + if (adapter->rss_indir_tbl_init != num_rx_queues) { + for (j = 0; j < IGB_RETA_SIZE; j++) + adapter->rss_indir_tbl[j] = + (j * num_rx_queues) / IGB_RETA_SIZE; + adapter->rss_indir_tbl_init = num_rx_queues; } + igb_write_rss_indir_tbl(adapter); - /* - * Disable raw packet checksumming so that RSS hash is placed in + /* Disable raw packet checksumming so that RSS hash is placed in * descriptor on writeback. No need to enable TCP/UDP/IP checksum * offloads as they are enabled by default */ @@ -2921,13 +3413,29 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) /* Don't need to set TUOFL or IPOFL, they default to 1 */ wr32(E1000_RXCSUM, rxcsum); + /* Generate RSS hash based on packet types, TCP/UDP + * port numbers and/or IPv4/v6 src and dst addresses + */ + mrqc = E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP | + E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; + + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; + /* If VMDq is enabled then we set the appropriate mode for that, else * we default to RSS so that an RSS hash is calculated per packet even - * if we are only using one queue */ + * if we are only using one queue + */ if (adapter->vfs_allocated_count) { if (hw->mac.type > e1000_82575) { /* Set the default pool for the PF's first queue */ u32 vtctl = rd32(E1000_VT_CTL); + vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | E1000_VT_CTL_DISABLE_DEF_POOL); vtctl |= adapter->vfs_allocated_count << @@ -2935,31 +3443,21 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) wr32(E1000_VT_CTL, vtctl); } if (adapter->rss_queues > 1) - mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q; + mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q; else - mrqc = E1000_MRQC_ENABLE_VMDQ; + mrqc |= E1000_MRQC_ENABLE_VMDQ; } else { - mrqc = E1000_MRQC_ENABLE_RSS_4Q; + if (hw->mac.type != e1000_i211) + mrqc |= E1000_MRQC_ENABLE_RSS_4Q; } igb_vmm_control(adapter); - /* - * Generate RSS hash based on TCP port numbers and/or - * IPv4/v6 src and dst addresses since UDP cannot be - * hashed reliably due to IP fragmentation - */ - mrqc |= E1000_MRQC_RSS_FIELD_IPV4 | - E1000_MRQC_RSS_FIELD_IPV4_TCP | - E1000_MRQC_RSS_FIELD_IPV6 | - E1000_MRQC_RSS_FIELD_IPV6_TCP | - E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; - wr32(E1000_MRQC, mrqc); } /** - * igb_setup_rctl - configure the receive control registers - * @adapter: Board private structure + * igb_setup_rctl - configure the receive control registers + * @adapter: Board private structure **/ void igb_setup_rctl(struct igb_adapter *adapter) { @@ -2974,8 +3472,7 @@ void igb_setup_rctl(struct igb_adapter *adapter) rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - /* - * enable stripping of CRC. It's unlikely this will break BMC + /* enable stripping of CRC. It's unlikely this will break BMC * redirection as it did with e1000. Newer features require * that the HW strips the CRC. */ @@ -2999,17 +3496,35 @@ void igb_setup_rctl(struct igb_adapter *adapter) wr32(E1000_QDE, ALL_QUEUES); } + /* This is useful for sniffing bad packets. */ + if (adapter->netdev->features & NETIF_F_RXALL) { + /* UPE and MPE will be handled by normal PROMISC logic + * in e1000e_set_rx_mode + */ + rctl |= (E1000_RCTL_SBP | /* Receive bad packets */ + E1000_RCTL_BAM | /* RX All Bcast Pkts */ + E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */ + + rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */ + E1000_RCTL_DPF | /* Allow filtered pause */ + E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */ + /* Do not mess with E1000_CTRL_VME, it affects transmit as well, + * and that breaks VLANs. + */ + } + wr32(E1000_RCTL, rctl); } static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, - int vfn) + int vfn) { struct e1000_hw *hw = &adapter->hw; u32 vmolr; /* if it isn't the PF check to see if VFs are enabled and - * increase the size to support vlan tags */ + * increase the size to support vlan tags + */ if (vfn < adapter->vfs_allocated_count && adapter->vf_data[vfn].vlans_enabled) size += VLAN_TAG_SIZE; @@ -3023,10 +3538,10 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, } /** - * igb_rlpml_set - set maximum receive packet size - * @adapter: board private structure + * igb_rlpml_set - set maximum receive packet size + * @adapter: board private structure * - * Configure maximum receivable packet size. + * Configure maximum receivable packet size. **/ static void igb_rlpml_set(struct igb_adapter *adapter) { @@ -3036,8 +3551,7 @@ static void igb_rlpml_set(struct igb_adapter *adapter) if (pf_id) { igb_set_vf_rlpml(adapter, max_frame_size, pf_id); - /* - * If we're in VMDQ or SR-IOV mode, then set global RLPML + /* If we're in VMDQ or SR-IOV mode, then set global RLPML * to our max jumbo frame size, in case we need to enable * jumbo frames on one of the rings later. * This will not pass over-length frames into the default @@ -3055,17 +3569,23 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, struct e1000_hw *hw = &adapter->hw; u32 vmolr; - /* - * This register exists only on 82576 and newer so if we are older then + /* This register exists only on 82576 and newer so if we are older then * we should exit and do nothing */ if (hw->mac.type < e1000_82576) return; vmolr = rd32(E1000_VMOLR(vfn)); - vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ + vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */ + if (hw->mac.type == e1000_i350) { + u32 dvmolr; + + dvmolr = rd32(E1000_DVMOLR(vfn)); + dvmolr |= E1000_DVMOLR_STRVLAN; + wr32(E1000_DVMOLR(vfn), dvmolr); + } if (aupe) - vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ + vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ else vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ @@ -3074,25 +3594,24 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter, if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ - /* - * for VMDq only allow the VFs and pool 0 to accept broadcast and + /* for VMDq only allow the VFs and pool 0 to accept broadcast and * multicast packets */ if (vfn <= adapter->vfs_allocated_count) - vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ + vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ wr32(E1000_VMOLR(vfn), vmolr); } /** - * igb_configure_rx_ring - Configure a receive ring after Reset - * @adapter: board private structure - * @ring: receive ring to be configured + * igb_configure_rx_ring - Configure a receive ring after Reset + * @adapter: board private structure + * @ring: receive ring to be configured * - * Configure the Rx unit of the MAC after a reset. + * Configure the Rx unit of the MAC after a reset. **/ void igb_configure_rx_ring(struct igb_adapter *adapter, - struct igb_ring *ring) + struct igb_ring *ring) { struct e1000_hw *hw = &adapter->hw; u64 rdba = ring->dma; @@ -3107,7 +3626,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, rdba & 0x00000000ffffffffULL); wr32(E1000_RDBAH(reg_idx), rdba >> 32); wr32(E1000_RDLEN(reg_idx), - ring->count * sizeof(union e1000_adv_rx_desc)); + ring->count * sizeof(union e1000_adv_rx_desc)); /* initialize head and tail */ ring->tail = hw->hw_addr + E1000_RDT(reg_idx); @@ -3116,12 +3635,8 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, /* set descriptor configuration */ srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; -#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384 - srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT; -#else - srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; -#endif - srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; + srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; if (hw->mac.type >= e1000_82580) srrctl |= E1000_SRRCTL_TIMESTAMP; /* Only set Drop Enable if we are supporting multiple queues */ @@ -3143,10 +3658,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, } /** - * igb_configure_rx - Configure receive Unit after Reset - * @adapter: board private structure + * igb_configure_rx - Configure receive Unit after Reset + * @adapter: board private structure * - * Configure the Rx unit of the MAC after a reset. + * Configure the Rx unit of the MAC after a reset. **/ static void igb_configure_rx(struct igb_adapter *adapter) { @@ -3157,19 +3672,20 @@ static void igb_configure_rx(struct igb_adapter *adapter) /* set the correct pool for the PF default MAC address in entry 0 */ igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0, - adapter->vfs_allocated_count); + adapter->vfs_allocated_count); /* Setup the HW Rx Head and Tail Descriptor Pointers and - * the Base and Length of the Rx Descriptor Ring */ + * the Base and Length of the Rx Descriptor Ring + */ for (i = 0; i < adapter->num_rx_queues; i++) igb_configure_rx_ring(adapter, adapter->rx_ring[i]); } /** - * igb_free_tx_resources - Free Tx Resources per Queue - * @tx_ring: Tx descriptor ring for a specific queue + * igb_free_tx_resources - Free Tx Resources per Queue + * @tx_ring: Tx descriptor ring for a specific queue * - * Free all transmit software resources + * Free all transmit software resources **/ void igb_free_tx_resources(struct igb_ring *tx_ring) { @@ -3189,10 +3705,10 @@ void igb_free_tx_resources(struct igb_ring *tx_ring) } /** - * igb_free_all_tx_resources - Free Tx Resources for All Queues - * @adapter: board private structure + * igb_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure * - * Free all transmit software resources + * Free all transmit software resources **/ static void igb_free_all_tx_resources(struct igb_adapter *adapter) { @@ -3207,26 +3723,26 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *ring, { if (tx_buffer->skb) { dev_kfree_skb_any(tx_buffer->skb); - if (tx_buffer->dma) + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); - } else if (tx_buffer->dma) { + } else if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; - tx_buffer->dma = 0; + dma_unmap_len_set(tx_buffer, len, 0); /* buffer_info must be completely set up in the transmit path */ } /** - * igb_clean_tx_ring - Free Tx Buffers - * @tx_ring: ring to be cleaned + * igb_clean_tx_ring - Free Tx Buffers + * @tx_ring: ring to be cleaned **/ static void igb_clean_tx_ring(struct igb_ring *tx_ring) { @@ -3242,6 +3758,7 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) buffer_info = &tx_ring->tx_buffer_info[i]; igb_unmap_and_free_tx_resource(tx_ring, buffer_info); } + netdev_tx_reset_queue(txring_txq(tx_ring)); size = sizeof(struct igb_tx_buffer) * tx_ring->count; @@ -3255,8 +3772,8 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) } /** - * igb_clean_all_tx_rings - Free Tx Buffers for all queues - * @adapter: board private structure + * igb_clean_all_tx_rings - Free Tx Buffers for all queues + * @adapter: board private structure **/ static void igb_clean_all_tx_rings(struct igb_adapter *adapter) { @@ -3267,10 +3784,10 @@ static void igb_clean_all_tx_rings(struct igb_adapter *adapter) } /** - * igb_free_rx_resources - Free Rx Resources - * @rx_ring: ring to clean the resources from + * igb_free_rx_resources - Free Rx Resources + * @rx_ring: ring to clean the resources from * - * Free all receive software resources + * Free all receive software resources **/ void igb_free_rx_resources(struct igb_ring *rx_ring) { @@ -3290,10 +3807,10 @@ void igb_free_rx_resources(struct igb_ring *rx_ring) } /** - * igb_free_all_rx_resources - Free Rx Resources for All Queues - * @adapter: board private structure + * igb_free_all_rx_resources - Free Rx Resources for All Queues + * @adapter: board private structure * - * Free all receive software resources + * Free all receive software resources **/ static void igb_free_all_rx_resources(struct igb_adapter *adapter) { @@ -3304,44 +3821,35 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter) } /** - * igb_clean_rx_ring - Free Rx Buffers per Queue - * @rx_ring: ring to free buffers from + * igb_clean_rx_ring - Free Rx Buffers per Queue + * @rx_ring: ring to free buffers from **/ static void igb_clean_rx_ring(struct igb_ring *rx_ring) { unsigned long size; u16 i; + if (rx_ring->skb) + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + if (!rx_ring->rx_buffer_info) return; /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; - if (buffer_info->dma) { - dma_unmap_single(rx_ring->dev, - buffer_info->dma, - IGB_RX_HDR_LEN, - DMA_FROM_DEVICE); - buffer_info->dma = 0; - } - if (buffer_info->skb) { - dev_kfree_skb(buffer_info->skb); - buffer_info->skb = NULL; - } - if (buffer_info->page_dma) { - dma_unmap_page(rx_ring->dev, - buffer_info->page_dma, - PAGE_SIZE / 2, - DMA_FROM_DEVICE); - buffer_info->page_dma = 0; - } - if (buffer_info->page) { - put_page(buffer_info->page); - buffer_info->page = NULL; - buffer_info->page_offset = 0; - } + if (!buffer_info->page) + continue; + + dma_unmap_page(rx_ring->dev, + buffer_info->dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(buffer_info->page); + + buffer_info->page = NULL; } size = sizeof(struct igb_rx_buffer) * rx_ring->count; @@ -3350,13 +3858,14 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); + rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } /** - * igb_clean_all_rx_rings - Free Rx Buffers for all queues - * @adapter: board private structure + * igb_clean_all_rx_rings - Free Rx Buffers for all queues + * @adapter: board private structure **/ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) { @@ -3367,11 +3876,11 @@ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) } /** - * igb_set_mac - Change the Ethernet Address of the NIC - * @netdev: network interface device structure - * @p: pointer to an address structure + * igb_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure * - * Returns 0 on success, negative on failure + * Returns 0 on success, negative on failure **/ static int igb_set_mac(struct net_device *netdev, void *p) { @@ -3387,19 +3896,19 @@ static int igb_set_mac(struct net_device *netdev, void *p) /* set the correct pool for the new PF MAC address in entry 0 */ igb_rar_set_qsel(adapter, hw->mac.addr, 0, - adapter->vfs_allocated_count); + adapter->vfs_allocated_count); return 0; } /** - * igb_write_mc_addr_list - write multicast addresses to MTA - * @netdev: network interface device structure + * igb_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure * - * Writes multicast address list to the MTA hash table. - * Returns: -ENOMEM on failure - * 0 on no addresses written - * X on writing X addresses to MTA + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA **/ static int igb_write_mc_addr_list(struct net_device *netdev) { @@ -3432,13 +3941,13 @@ static int igb_write_mc_addr_list(struct net_device *netdev) } /** - * igb_write_uc_addr_list - write unicast addresses to RAR table - * @netdev: network interface device structure + * igb_write_uc_addr_list - write unicast addresses to RAR table + * @netdev: network interface device structure * - * Writes unicast address list to the RAR table. - * Returns: -ENOMEM on failure/insufficient address space - * 0 on no addresses written - * X on writing X addresses to the RAR table + * Writes unicast address list to the RAR table. + * Returns: -ENOMEM on failure/insufficient address space + * 0 on no addresses written + * X on writing X addresses to the RAR table **/ static int igb_write_uc_addr_list(struct net_device *netdev) { @@ -3459,8 +3968,8 @@ static int igb_write_uc_addr_list(struct net_device *netdev) if (!rar_entries) break; igb_rar_set_qsel(adapter, ha->addr, - rar_entries--, - vfn); + rar_entries--, + vfn); count++; } } @@ -3475,13 +3984,13 @@ static int igb_write_uc_addr_list(struct net_device *netdev) } /** - * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set - * @netdev: network interface device structure + * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set + * @netdev: network interface device structure * - * The set_rx_mode entry point is called whenever the unicast or multicast - * address lists or the network interface flags are updated. This routine is - * responsible for configuring the hardware for proper unicast, multicast, - * promiscuous mode, and all-multi behavior. + * The set_rx_mode entry point is called whenever the unicast or multicast + * address lists or the network interface flags are updated. This routine is + * responsible for configuring the hardware for proper unicast, multicast, + * promiscuous mode, and all-multi behavior. **/ static void igb_set_rx_mode(struct net_device *netdev) { @@ -3498,6 +4007,9 @@ static void igb_set_rx_mode(struct net_device *netdev) rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); if (netdev->flags & IFF_PROMISC) { + /* retain VLAN HW filtering if in VT mode */ + if (adapter->vfs_allocated_count) + rctl |= E1000_RCTL_VFE; rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); } else { @@ -3505,8 +4017,7 @@ static void igb_set_rx_mode(struct net_device *netdev) rctl |= E1000_RCTL_MPE; vmolr |= E1000_VMOLR_MPME; } else { - /* - * Write addresses to the MTA, if the attempt fails + /* Write addresses to the MTA, if the attempt fails * then we should just turn on promiscuous mode so * that we can at least receive multicast traffic */ @@ -3518,8 +4029,7 @@ static void igb_set_rx_mode(struct net_device *netdev) vmolr |= E1000_VMOLR_ROMPE; } } - /* - * Write addresses to available RAR registers, if there is not + /* Write addresses to available RAR registers, if there is not * sufficient space to store all the addresses then enable * unicast promiscuous mode */ @@ -3532,17 +4042,16 @@ static void igb_set_rx_mode(struct net_device *netdev) } wr32(E1000_RCTL, rctl); - /* - * In order to support SR-IOV and eventually VMDq it is necessary to set + /* In order to support SR-IOV and eventually VMDq it is necessary to set * the VMOLR to enable the appropriate modes. Without this workaround * we will have issues with VLAN tag stripping not being done for frames * that are only arriving because we are the default pool */ - if (hw->mac.type < e1000_82576) + if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350)) return; vmolr |= rd32(E1000_VMOLR(vfn)) & - ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); + ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); wr32(E1000_VMOLR(vfn), vmolr); igb_restore_vf_multicasts(adapter); } @@ -3555,7 +4064,8 @@ static void igb_check_wvbr(struct igb_adapter *adapter) switch (hw->mac.type) { case e1000_82576: case e1000_i350: - if (!(wvbr = rd32(E1000_WVBR))) + wvbr = rd32(E1000_WVBR); + if (!wvbr) return; break; default: @@ -3574,7 +4084,7 @@ static void igb_spoof_check(struct igb_adapter *adapter) if (!adapter->wvbr) return; - for(j = 0; j < adapter->vfs_allocated_count; j++) { + for (j = 0; j < adapter->vfs_allocated_count; j++) { if (adapter->wvbr & (1 << j) || adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { dev_warn(&adapter->pdev->dev, @@ -3587,7 +4097,8 @@ static void igb_spoof_check(struct igb_adapter *adapter) } /* Need to wait a few seconds after link up to get diagnostic information from - * the phy */ + * the phy + */ static void igb_update_phy_info(unsigned long data) { struct igb_adapter *adapter = (struct igb_adapter *) data; @@ -3595,14 +4106,13 @@ static void igb_update_phy_info(unsigned long data) } /** - * igb_has_link - check shared code for link and determine up/down - * @adapter: pointer to driver private info + * igb_has_link - check shared code for link and determine up/down + * @adapter: pointer to driver private info **/ bool igb_has_link(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; bool link_active = false; - s32 ret_val = 0; /* get_link_status is set on LSC (link status) interrupt or * rx sequence error interrupt. get_link_status will stay @@ -3611,22 +4121,28 @@ bool igb_has_link(struct igb_adapter *adapter) */ switch (hw->phy.media_type) { case e1000_media_type_copper: - if (hw->mac.get_link_status) { - ret_val = hw->mac.ops.check_for_link(hw); - link_active = !hw->mac.get_link_status; - } else { - link_active = true; - } - break; + if (!hw->mac.get_link_status) + return true; case e1000_media_type_internal_serdes: - ret_val = hw->mac.ops.check_for_link(hw); - link_active = hw->mac.serdes_has_link; + hw->mac.ops.check_for_link(hw); + link_active = !hw->mac.get_link_status; break; default: case e1000_media_type_unknown: break; } + if (((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) && + (hw->phy.id == I210_I_PHY_ID)) { + if (!netif_carrier_ok(adapter->netdev)) { + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) { + adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE; + adapter->link_check_timeout = jiffies; + } + } + return link_active; } @@ -3635,23 +4151,22 @@ static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event) bool ret = false; u32 ctrl_ext, thstat; - /* check for thermal sensor event on i350, copper only */ + /* check for thermal sensor event on i350 copper only */ if (hw->mac.type == e1000_i350) { thstat = rd32(E1000_THSTAT); ctrl_ext = rd32(E1000_CTRL_EXT); if ((hw->phy.media_type == e1000_media_type_copper) && - !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) { + !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) ret = !!(thstat & event); - } } return ret; } /** - * igb_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long + * igb_watchdog - Timer Call-back + * @data: pointer to adapter cast into an unsigned long **/ static void igb_watchdog(unsigned long data) { @@ -3663,28 +4178,53 @@ static void igb_watchdog(unsigned long data) static void igb_watchdog_task(struct work_struct *work) { struct igb_adapter *adapter = container_of(work, - struct igb_adapter, - watchdog_task); + struct igb_adapter, + watchdog_task); struct e1000_hw *hw = &adapter->hw; + struct e1000_phy_info *phy = &hw->phy; struct net_device *netdev = adapter->netdev; u32 link; int i; + u32 connsw; link = igb_has_link(adapter); + + if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) { + if (time_after(jiffies, (adapter->link_check_timeout + HZ))) + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + else + link = false; + } + + /* Force link down if we have fiber to swap to */ + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + if (hw->phy.media_type == e1000_media_type_copper) { + connsw = rd32(E1000_CONNSW); + if (!(connsw & E1000_CONNSW_AUTOSENSE_EN)) + link = 0; + } + } if (link) { + /* Perform a reset if the media type changed. */ + if (hw->dev_spec._82575.media_changed) { + hw->dev_spec._82575.media_changed = false; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + igb_reset(adapter); + } /* Cancel scheduled suspend requests. */ pm_runtime_resume(netdev->dev.parent); if (!netif_carrier_ok(netdev)) { u32 ctrl; + hw->mac.ops.get_speed_and_duplex(hw, - &adapter->link_speed, - &adapter->link_duplex); + &adapter->link_speed, + &adapter->link_duplex); ctrl = rd32(E1000_CTRL); /* Links status message must follow this format */ - printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s " - "Duplex, Flow Control: %s\n", + netdev_info(netdev, + "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", netdev->name, adapter->link_speed, adapter->link_duplex == FULL_DUPLEX ? @@ -3694,13 +4234,24 @@ static void igb_watchdog_task(struct work_struct *work) (ctrl & E1000_CTRL_RFCE) ? "RX" : (ctrl & E1000_CTRL_TFCE) ? "TX" : "None"); + /* disable EEE if enabled */ + if ((adapter->flags & IGB_FLAG_EEE) && + (adapter->link_duplex == HALF_DUPLEX)) { + dev_info(&adapter->pdev->dev, + "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex.\n"); + adapter->hw.dev_spec._82575.eee_disable = true; + adapter->flags &= ~IGB_FLAG_EEE; + } + + /* check if SmartSpeed worked */ + igb_check_downshift(hw); + if (phy->speed_downgraded) + netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n"); + /* check for thermal sensor event */ if (igb_thermal_sensor_event(hw, - E1000_THSTAT_LINK_THROTTLE)) { - netdev_info(netdev, "The network adapter link " - "speed was downshifted because it " - "overheated\n"); - } + E1000_THSTAT_LINK_THROTTLE)) + netdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n"); /* adjust timeout factor according to speed/duplex */ adapter->tx_timeout_factor = 1; @@ -3731,12 +4282,11 @@ static void igb_watchdog_task(struct work_struct *work) /* check for thermal sensor event */ if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) { - netdev_err(netdev, "The network adapter was " - "stopped because it overheated\n"); + netdev_err(netdev, "The network adapter was stopped because it overheated\n"); } /* Links status message must follow this format */ - printk(KERN_INFO "igb: %s NIC Link is Down\n", + netdev_info(netdev, "igb: %s NIC Link is Down\n", netdev->name); netif_carrier_off(netdev); @@ -3747,8 +4297,27 @@ static void igb_watchdog_task(struct work_struct *work) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); + /* link is down, time to check for alternate media */ + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + igb_check_swap_media(adapter); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } + } pm_schedule_suspend(netdev->dev.parent, MSEC_PER_SEC * 5); + + /* also check for alternate media here */ + } else if (!netif_carrier_ok(netdev) && + (adapter->flags & IGB_FLAG_MAS_ENABLE)) { + igb_check_swap_media(adapter); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } } } @@ -3762,7 +4331,8 @@ static void igb_watchdog_task(struct work_struct *work) /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. - * (Do the reset outside of interrupt context). */ + * (Do the reset outside of interrupt context). + */ if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); @@ -3775,9 +4345,10 @@ static void igb_watchdog_task(struct work_struct *work) set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); } - /* Cause software interrupt to ensure rx ring is cleaned */ - if (adapter->msix_entries) { + /* Cause software interrupt to ensure Rx ring is cleaned */ + if (adapter->flags & IGB_FLAG_HAS_MSIX) { u32 eics = 0; + for (i = 0; i < adapter->num_q_vectors; i++) eics |= adapter->q_vector[i]->eims_value; wr32(E1000_EICS, eics); @@ -3786,11 +4357,17 @@ static void igb_watchdog_task(struct work_struct *work) } igb_spoof_check(adapter); + igb_ptp_rx_hang(adapter); /* Reset the timer */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 2 * HZ)); + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + HZ)); + else + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + 2 * HZ)); + } } enum latency_range { @@ -3801,20 +4378,19 @@ enum latency_range { }; /** - * igb_update_ring_itr - update the dynamic ITR value based on packet size + * igb_update_ring_itr - update the dynamic ITR value based on packet size + * @q_vector: pointer to q_vector * - * Stores a new ITR value based on strictly on packet size. This - * algorithm is less sophisticated than that used in igb_update_itr, - * due to the difficulty of synchronizing statistics across multiple - * receive rings. The divisors and thresholds used by this function - * were determined based on theoretical maximum wire speed and testing - * data, in order to minimize response time while increasing bulk - * throughput. - * This functionality is controlled by the InterruptThrottleRate module - * parameter (see igb_param.c) - * NOTE: This function is called only when operating in a multiqueue - * receive environment. - * @q_vector: pointer to q_vector + * Stores a new ITR value based on strictly on packet size. This + * algorithm is less sophisticated than that used in igb_update_itr, + * due to the difficulty of synchronizing statistics across multiple + * receive rings. The divisors and thresholds used by this function + * were determined based on theoretical maximum wire speed and testing + * data, in order to minimize response time while increasing bulk + * throughput. + * This functionality is controlled by ethtool's coalescing settings. + * NOTE: This function is called only when operating in a multiqueue + * receive environment. **/ static void igb_update_ring_itr(struct igb_q_vector *q_vector) { @@ -3875,20 +4451,20 @@ clear_counts: } /** - * igb_update_itr - update the dynamic ITR value based on statistics - * Stores a new ITR value based on packets and byte - * counts during the last interrupt. The advantage of per interrupt - * computation is faster updates and more accurate ITR for the current - * traffic pattern. Constants in this function were computed - * based on theoretical maximum wire speed and thresholds were set based - * on testing data as well as attempting to minimize response time - * while increasing bulk throughput. - * this functionality is controlled by the InterruptThrottleRate module - * parameter (see igb_param.c) - * NOTE: These calculations are only valid when operating in a single- - * queue environment. - * @q_vector: pointer to q_vector - * @ring_container: ring info to update the itr for + * igb_update_itr - update the dynamic ITR value based on statistics + * @q_vector: pointer to q_vector + * @ring_container: ring info to update the itr for + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * This functionality is controlled by ethtool's coalescing settings. + * NOTE: These calculations are only valid when operating in a single- + * queue environment. **/ static void igb_update_itr(struct igb_q_vector *q_vector, struct igb_ring_container *ring_container) @@ -3912,13 +4488,12 @@ static void igb_update_itr(struct igb_q_vector *q_vector, case low_latency: /* 50 usec aka 20000 ints/s */ if (bytes > 10000) { /* this if handles the TSO accounting */ - if (bytes/packets > 8000) { + if (bytes/packets > 8000) itrval = bulk_latency; - } else if ((packets < 10) || ((bytes/packets) > 1200)) { + else if ((packets < 10) || ((bytes/packets) > 1200)) itrval = bulk_latency; - } else if ((packets > 35)) { + else if ((packets > 35)) itrval = lowest_latency; - } } else if (bytes/packets > 2000) { itrval = bulk_latency; } else if (packets <= 2 && bytes < 512) { @@ -3986,12 +4561,12 @@ set_itr_now: if (new_itr != q_vector->itr_val) { /* this attempts to bias the interrupt rate towards Bulk * by adding intermediate steps when interrupt rate is - * increasing */ + * increasing + */ new_itr = new_itr > q_vector->itr_val ? - max((new_itr * q_vector->itr_val) / - (new_itr + (q_vector->itr_val >> 2)), - new_itr) : - new_itr; + max((new_itr * q_vector->itr_val) / + (new_itr + (q_vector->itr_val >> 2)), + new_itr) : new_itr; /* Don't write the value here; it resets the adapter's * internal timer, and causes us to delay far longer than * we should between interrupts. Instead, we write the ITR @@ -4034,20 +4609,22 @@ static int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb = first->skb; u32 vlan_macip_lens, type_tucmd; u32 mss_l4len_idx, l4len; + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; if (!skb_is_gso(skb)) return 0; - if (skb_header_cloned(skb)) { - int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (err) - return err; - } + err = skb_cow_head(skb, 0); + if (err < 0) + return err; /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; - if (first->protocol == __constant_htons(ETH_P_IP)) { + if (first->protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = 0; iph->check = 0; @@ -4102,21 +4679,22 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) return; } else { u8 l4_hdr = 0; + switch (first->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): vlan_macip_lens |= skb_network_header_len(skb); type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; l4_hdr = ip_hdr(skb)->protocol; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): vlan_macip_lens |= skb_network_header_len(skb); l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but proto=%x!\n", - first->protocol); + "partial checksum but proto=%x!\n", + first->protocol); } break; } @@ -4139,8 +4717,8 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) default: if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, - "partial checksum but l4 proto=%x!\n", - l4_hdr); + "partial checksum but l4 proto=%x!\n", + l4_hdr); } break; } @@ -4155,24 +4733,32 @@ static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); } -static __le32 igb_tx_cmd_type(u32 tx_flags) +#define IGB_SET_FLAG(_input, _flag, _result) \ + ((_flag <= _result) ? \ + ((u32)(_input & _flag) * (_result / _flag)) : \ + ((u32)(_input & _flag) / (_flag / _result))) + +static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ - __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA | - E1000_ADVTXD_DCMD_IFCS | - E1000_ADVTXD_DCMD_DEXT); + u32 cmd_type = E1000_ADVTXD_DTYP_DATA | + E1000_ADVTXD_DCMD_DEXT | + E1000_ADVTXD_DCMD_IFCS; /* set HW vlan bit if vlan is present */ - if (tx_flags & IGB_TX_FLAGS_VLAN) - cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN, + (E1000_ADVTXD_DCMD_VLE)); + + /* set segmentation bits for TSO */ + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO, + (E1000_ADVTXD_DCMD_TSE)); /* set timestamp bit if present */ - if (tx_flags & IGB_TX_FLAGS_TSTAMP) - cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP, + (E1000_ADVTXD_MAC_TSTAMP)); - /* set segmentation bits for TSO */ - if (tx_flags & IGB_TX_FLAGS_TSO) - cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE); + /* insert frame checksum */ + cmd_type ^= IGB_SET_FLAG(skb->no_fcs, 1, E1000_ADVTXD_DCMD_IFCS); return cmd_type; } @@ -4183,64 +4769,61 @@ static void igb_tx_olinfo_status(struct igb_ring *tx_ring, { u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; - /* 82575 requires a unique index per ring if any offload is enabled */ - if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) && - test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) + /* 82575 requires a unique index per ring */ + if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) olinfo_status |= tx_ring->reg_idx << 4; /* insert L4 checksum */ - if (tx_flags & IGB_TX_FLAGS_CSUM) { - olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + olinfo_status |= IGB_SET_FLAG(tx_flags, + IGB_TX_FLAGS_CSUM, + (E1000_TXD_POPTS_TXSM << 8)); - /* insert IPv4 checksum */ - if (tx_flags & IGB_TX_FLAGS_IPV4) - olinfo_status |= E1000_TXD_POPTS_IXSM << 8; - } + /* insert IPv4 checksum */ + olinfo_status |= IGB_SET_FLAG(tx_flags, + IGB_TX_FLAGS_IPV4, + (E1000_TXD_POPTS_IXSM << 8)); tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); } -/* - * The largest size we can write to the descriptor is 65535. In order to - * maintain a power of two alignment we have to limit ourselves to 32K. - */ -#define IGB_MAX_TXD_PWR 15 -#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR) - static void igb_tx_map(struct igb_ring *tx_ring, struct igb_tx_buffer *first, const u8 hdr_len) { struct sk_buff *skb = first->skb; - struct igb_tx_buffer *tx_buffer_info; + struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; + struct skb_frag_struct *frag; dma_addr_t dma; - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; - unsigned int data_len = skb->data_len; - unsigned int size = skb_headlen(skb); - unsigned int paylen = skb->len - hdr_len; - __le32 cmd_type; + unsigned int data_len, size; u32 tx_flags = first->tx_flags; + u32 cmd_type = igb_tx_cmd_type(skb, tx_flags); u16 i = tx_ring->next_to_use; tx_desc = IGB_TX_DESC(tx_ring, i); - igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen); - cmd_type = igb_tx_cmd_type(tx_flags); + igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); + + size = skb_headlen(skb); + data_len = skb->data_len; dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); - if (dma_mapping_error(tx_ring->dev, dma)) - goto dma_error; - /* record length, and DMA address */ - first->length = size; - first->dma = dma; - tx_desc->read.buffer_addr = cpu_to_le64(dma); + tx_buffer = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); + + tx_desc->read.buffer_addr = cpu_to_le64(dma); - for (;;) { while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { tx_desc->read.cmd_type_len = - cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD); + cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD); i++; tx_desc++; @@ -4248,18 +4831,18 @@ static void igb_tx_map(struct igb_ring *tx_ring, tx_desc = IGB_TX_DESC(tx_ring, 0); i = 0; } + tx_desc->read.olinfo_status = 0; dma += IGB_MAX_DATA_PER_TXD; size -= IGB_MAX_DATA_PER_TXD; - tx_desc->read.olinfo_status = 0; tx_desc->read.buffer_addr = cpu_to_le64(dma); } if (likely(!data_len)) break; - tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); i++; tx_desc++; @@ -4267,36 +4850,27 @@ static void igb_tx_map(struct igb_ring *tx_ring, tx_desc = IGB_TX_DESC(tx_ring, 0); i = 0; } + tx_desc->read.olinfo_status = 0; size = skb_frag_size(frag); data_len -= size; dma = skb_frag_dma_map(tx_ring->dev, frag, 0, - size, DMA_TO_DEVICE); - if (dma_mapping_error(tx_ring->dev, dma)) - goto dma_error; + size, DMA_TO_DEVICE); - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - tx_buffer_info->length = size; - tx_buffer_info->dma = dma; - - tx_desc->read.olinfo_status = 0; - tx_desc->read.buffer_addr = cpu_to_le64(dma); - - frag++; + tx_buffer = &tx_ring->tx_buffer_info[i]; } - netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); - /* write last descriptor with RS and EOP bits */ - cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD); - tx_desc->read.cmd_type_len = cmd_type; + cmd_type |= size | IGB_TXD_DCMD; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); /* set the timestamp */ first->time_stamp = jiffies; - /* - * Force memory writes to complete before letting h/w know there + /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. (Only applicable for weak-ordered * memory model archs, such as IA-64). * @@ -4317,7 +4891,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, writel(i, tx_ring->tail); /* we need this if more than one processor can write to our tail - * at a time, it syncronizes IO on IA64/Altix systems */ + * at a time, it synchronizes IO on IA64/Altix systems + */ mmiowb(); return; @@ -4327,9 +4902,9 @@ dma_error: /* clear dma mappings for failed tx_buffer_info map */ for (;;) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info == first) + tx_buffer = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) break; if (i == 0) i = tx_ring->count; @@ -4347,11 +4922,13 @@ static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) /* Herbert's original patch had: * smp_mb__after_netif_stop_queue(); - * but since that doesn't exist yet, just open code it. */ + * but since that doesn't exist yet, just open code it. + */ smp_mb(); /* We need to check again in a case another CPU has just - * made room available. */ + * made room available. + */ if (igb_desc_unused(tx_ring) < size) return -EBUSY; @@ -4378,15 +4955,26 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; + u16 count = TXD_USE_COUNT(skb_headlen(skb)); __be16 protocol = vlan_get_protocol(skb); u8 hdr_len = 0; - /* need: 1 descriptor per page, + /* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD, + * + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD, * + 2 desc gap to keep tail from touching head, - * + 1 desc for skb->data, * + 1 desc for context descriptor, - * otherwise try next time */ - if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) { + * otherwise try next time + */ + if (NETDEV_FRAG_PAGE_MAX_SIZE > IGB_MAX_DATA_PER_TXD) { + unsigned short f; + + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + } else { + count += skb_shinfo(skb)->nr_frags; + } + + if (igb_maybe_stop_tx(tx_ring, count + 3)) { /* this is a hard error */ return NETDEV_TX_BUSY; } @@ -4398,10 +4986,22 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, first->gso_segs = 1; if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - tx_flags |= IGB_TX_FLAGS_TSTAMP; + struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); + + if (!test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS, + &adapter->state)) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_flags |= IGB_TX_FLAGS_TSTAMP; + + adapter->ptp_tx_skb = skb_get(skb); + adapter->ptp_tx_start = jiffies; + if (adapter->hw.mac.type == e1000_82576) + schedule_work(&adapter->ptp_tx_work); + } } + skb_tx_timestamp(skb); + if (vlan_tx_tag_present(skb)) { tx_flags |= IGB_TX_FLAGS_VLAN; tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); @@ -4420,7 +5020,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, igb_tx_map(tx_ring, first, hdr_len); /* Make sure there is space in the ring for the next send. */ - igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); + igb_maybe_stop_tx(tx_ring, DESC_NEEDED); return NETDEV_TX_OK; @@ -4456,22 +5056,22 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } - /* - * The minimum packet size with TCTL.PSP set is 17 so pad the skb + /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (skb->len < 17) { - if (skb_padto(skb, 17)) + if (unlikely(skb->len < 17)) { + if (skb_pad(skb, 17 - skb->len)) return NETDEV_TX_OK; skb->len = 17; + skb_set_tail_pointer(skb, 17); } return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); } /** - * igb_tx_timeout - Respond to a Tx Hang - * @netdev: network interface device structure + * igb_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure **/ static void igb_tx_timeout(struct net_device *netdev) { @@ -4500,13 +5100,12 @@ static void igb_reset_task(struct work_struct *work) } /** - * igb_get_stats64 - Get System Network Statistics - * @netdev: network interface device structure - * @stats: rtnl_link_stats64 pointer - * + * igb_get_stats64 - Get System Network Statistics + * @netdev: network interface device structure + * @stats: rtnl_link_stats64 pointer **/ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) + struct rtnl_link_stats64 *stats) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -4519,11 +5118,11 @@ static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev, } /** - * igb_change_mtu - Change the Maximum Transfer Unit - * @netdev: network interface device structure - * @new_mtu: new value for maximum frame size + * igb_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size * - * Returns 0 on success, negative on failure + * Returns 0 on success, negative on failure **/ static int igb_change_mtu(struct net_device *netdev, int new_mtu) { @@ -4542,8 +5141,12 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } + /* adjust max frame to be at least the size of a standard frame */ + if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) + max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) - msleep(1); + usleep_range(1000, 2000); /* igb_down has a dependency on max_frame_size */ adapter->max_frame_size = max_frame; @@ -4566,10 +5169,9 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) } /** - * igb_update_stats - Update the board statistics counters - * @adapter: board private structure + * igb_update_stats - Update the board statistics counters + * @adapter: board private structure **/ - void igb_update_stats(struct igb_adapter *adapter, struct rtnl_link_stats64 *net_stats) { @@ -4584,8 +5186,7 @@ void igb_update_stats(struct igb_adapter *adapter, #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF - /* - * Prevent stats update while adapter is being reset, or if the pci + /* Prevent stats update while adapter is being reset, or if the pci * connection is down. */ if (adapter->link_speed == 0) @@ -4595,18 +5196,24 @@ void igb_update_stats(struct igb_adapter *adapter, bytes = 0; packets = 0; + + rcu_read_lock(); for (i = 0; i < adapter->num_rx_queues; i++) { - u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF; struct igb_ring *ring = adapter->rx_ring[i]; + u32 rqdpc = rd32(E1000_RQDPC(i)); + if (hw->mac.type >= e1000_i210) + wr32(E1000_RQDPC(i), 0); - ring->rx_stats.drops += rqdpc_tmp; - net_stats->rx_fifo_errors += rqdpc_tmp; + if (rqdpc) { + ring->rx_stats.drops += rqdpc; + net_stats->rx_fifo_errors += rqdpc; + } do { - start = u64_stats_fetch_begin_bh(&ring->rx_syncp); + start = u64_stats_fetch_begin_irq(&ring->rx_syncp); _bytes = ring->rx_stats.bytes; _packets = ring->rx_stats.packets; - } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start)); bytes += _bytes; packets += _packets; } @@ -4619,15 +5226,16 @@ void igb_update_stats(struct igb_adapter *adapter, for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *ring = adapter->tx_ring[i]; do { - start = u64_stats_fetch_begin_bh(&ring->tx_syncp); + start = u64_stats_fetch_begin_irq(&ring->tx_syncp); _bytes = ring->tx_stats.bytes; _packets = ring->tx_stats.packets; - } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start)); + } while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start)); bytes += _bytes; packets += _packets; } net_stats->tx_bytes = bytes; net_stats->tx_packets = packets; + rcu_read_unlock(); /* read stats registers */ adapter->stats.crcerrs += rd32(E1000_CRCERRS); @@ -4690,7 +5298,11 @@ void igb_update_stats(struct igb_adapter *adapter, reg = rd32(E1000_CTRL_EXT); if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { adapter->stats.rxerrc += rd32(E1000_RXERRC); - adapter->stats.tncrs += rd32(E1000_TNCRS); + + /* this stat has invalid values on i210/i211 */ + if ((hw->mac.type != e1000_i210) && + (hw->mac.type != e1000_i211)) + adapter->stats.tncrs += rd32(E1000_TNCRS); } adapter->stats.tsctc += rd32(E1000_TSCTC); @@ -4713,7 +5325,8 @@ void igb_update_stats(struct igb_adapter *adapter, /* Rx Errors */ /* RLEC on some newer hardware can be incorrect so build - * our own version based on RUC and ROC */ + * our own version based on RUC and ROC + */ net_stats->rx_errors = adapter->stats.rxerrc + adapter->stats.crcerrs + adapter->stats.algnerrc + adapter->stats.ruc + adapter->stats.roc + @@ -4772,7 +5385,8 @@ static irqreturn_t igb_msix_other(int irq, void *data) adapter->stats.doosync++; /* The DMA Out of Sync is also indication of a spoof event * in IOV mode. Check the Wrong VM Behavior register to - * see if it is really a spoof event. */ + * see if it is really a spoof event. + */ igb_check_wvbr(adapter); } @@ -4787,6 +5401,17 @@ static irqreturn_t igb_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } + wr32(E1000_EIMS, adapter->eims_other); return IRQ_HANDLED; @@ -4825,45 +5450,61 @@ static irqreturn_t igb_msix_ring(int irq, void *data) } #ifdef CONFIG_IGB_DCA +static void igb_update_tx_dca(struct igb_adapter *adapter, + struct igb_ring *tx_ring, + int cpu) +{ + struct e1000_hw *hw = &adapter->hw; + u32 txctrl = dca3_get_tag(tx_ring->dev, cpu); + + if (hw->mac.type != e1000_82575) + txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT; + + /* We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN | + E1000_DCA_TXCTRL_DATA_RRO_EN | + E1000_DCA_TXCTRL_DESC_DCA_EN; + + wr32(E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl); +} + +static void igb_update_rx_dca(struct igb_adapter *adapter, + struct igb_ring *rx_ring, + int cpu) +{ + struct e1000_hw *hw = &adapter->hw; + u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu); + + if (hw->mac.type != e1000_82575) + rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT; + + /* We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN | + E1000_DCA_RXCTRL_DESC_DCA_EN; + + wr32(E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl); +} + static void igb_update_dca(struct igb_q_vector *q_vector) { struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; int cpu = get_cpu(); if (q_vector->cpu == cpu) goto out_no_update; - if (q_vector->tx.ring) { - int q = q_vector->tx.ring->reg_idx; - u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q)); - if (hw->mac.type == e1000_82575) { - dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK; - dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); - } else { - dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576; - dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << - E1000_DCA_TXCTRL_CPUID_SHIFT; - } - dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN; - wr32(E1000_DCA_TXCTRL(q), dca_txctrl); - } - if (q_vector->rx.ring) { - int q = q_vector->rx.ring->reg_idx; - u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q)); - if (hw->mac.type == e1000_82575) { - dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK; - dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu); - } else { - dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576; - dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) << - E1000_DCA_RXCTRL_CPUID_SHIFT; - } - dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN; - dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN; - dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN; - wr32(E1000_DCA_RXCTRL(q), dca_rxctrl); - } + if (q_vector->tx.ring) + igb_update_tx_dca(adapter, q_vector->tx.ring, cpu); + + if (q_vector->rx.ring) + igb_update_rx_dca(adapter, q_vector->rx.ring, cpu); + q_vector->cpu = cpu; out_no_update: put_cpu(); @@ -4909,7 +5550,8 @@ static int __igb_notify_dca(struct device *dev, void *data) case DCA_PROVIDER_REMOVE: if (adapter->flags & IGB_FLAG_DCA_ENABLED) { /* without this a class_device is left - * hanging around in the sysfs model */ + * hanging around in the sysfs model + */ dca_remove_requester(dev); dev_info(&pdev->dev, "DCA disabled\n"); adapter->flags &= ~IGB_FLAG_DCA_ENABLED; @@ -4922,12 +5564,12 @@ static int __igb_notify_dca(struct device *dev, void *data) } static int igb_notify_dca(struct notifier_block *nb, unsigned long event, - void *p) + void *p) { int ret_val; ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, - __igb_notify_dca); + __igb_notify_dca); return ret_val ? NOTIFY_BAD : NOTIFY_DONE; } @@ -4937,103 +5579,14 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event, static int igb_vf_configure(struct igb_adapter *adapter, int vf) { unsigned char mac_addr[ETH_ALEN]; - struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; - struct pci_dev *pvfdev; - unsigned int device_id; - u16 thisvf_devfn; - random_ether_addr(mac_addr); + eth_zero_addr(mac_addr); igb_set_vf_mac(adapter, vf, mac_addr); - switch (adapter->hw.mac.type) { - case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) | - (pdev->devfn & 1); - break; - case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) | - (pdev->devfn & 3); - break; - default: - device_id = 0; - thisvf_devfn = 0; - break; - } - - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == thisvf_devfn) - break; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); - } - - if (pvfdev) - adapter->vf_data[vf].vfdev = pvfdev; - else - dev_err(&pdev->dev, - "Couldn't find pci dev ptr for VF %4.4x\n", - thisvf_devfn); - return pvfdev != NULL; -} + /* By default spoof check is enabled for all VFs */ + adapter->vf_data[vf].spoofchk_enabled = true; -static int igb_find_enabled_vfs(struct igb_adapter *adapter) -{ - struct e1000_hw *hw = &adapter->hw; - struct pci_dev *pdev = adapter->pdev; - struct pci_dev *pvfdev; - u16 vf_devfn = 0; - u16 vf_stride; - unsigned int device_id; - int vfs_found = 0; - - switch (adapter->hw.mac.type) { - case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - vf_stride = 2; - break; - case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - vf_stride = 4; - break; - default: - device_id = 0; - vf_stride = 0; - break; - } - - vf_devfn = pdev->devfn + 0x80; - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == vf_devfn && - (pvfdev->bus->number >= pdev->bus->number)) - vfs_found++; - vf_devfn += vf_stride; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); - } - - return vfs_found; -} - -static int igb_check_vf_assignment(struct igb_adapter *adapter) -{ - int i; - for (i = 0; i < adapter->vfs_allocated_count; i++) { - if (adapter->vf_data[i].vfdev) { - if (adapter->vf_data[i].vfdev->dev_flags & - PCI_DEV_FLAGS_ASSIGNED) - return true; - } - } - return false; + return 0; } #endif @@ -5058,7 +5611,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) struct vf_data_storage *vf_data = &adapter->vf_data[vf]; vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | - IGB_VF_FLAG_MULTI_PROMISC); + IGB_VF_FLAG_MULTI_PROMISC); vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { @@ -5066,8 +5619,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; } else { - /* - * if we have hashes and we are clearing a multicast promisc + /* if we have hashes and we are clearing a multicast promisc * flag we need to write the hashes to the MTA as this step * was previously skipped */ @@ -5075,6 +5627,7 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) vmolr |= E1000_VMOLR_MPME; } else if (vf_data->num_vf_mc_hashes) { int j; + vmolr |= E1000_VMOLR_ROMPE; for (j = 0; j < vf_data->num_vf_mc_hashes; j++) igb_mta_set(hw, vf_data->vf_mc_hashes[j]); @@ -5088,7 +5641,6 @@ static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) return -EINVAL; return 0; - } static int igb_set_vf_multicasts(struct igb_adapter *adapter, @@ -5127,6 +5679,7 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter) for (i = 0; i < adapter->vfs_allocated_count; i++) { u32 vmolr = rd32(E1000_VMOLR(i)); + vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); vf_data = &adapter->vf_data[i]; @@ -5225,6 +5778,7 @@ static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) if (!adapter->vf_data[vf].vlans_enabled) { u32 size; + reg = rd32(E1000_VMOLR(vf)); size = reg & E1000_VMOLR_RLPML_MASK; size += 4; @@ -5253,6 +5807,7 @@ static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) adapter->vf_data[vf].vlans_enabled--; if (!adapter->vf_data[vf].vlans_enabled) { u32 size; + reg = rd32(E1000_VMOLR(vf)); size = reg & E1000_VMOLR_RLPML_MASK; size -= 4; @@ -5295,30 +5850,91 @@ static int igb_ndo_set_vf_vlan(struct net_device *netdev, "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); if (test_bit(__IGB_DOWN, &adapter->state)) { dev_warn(&adapter->pdev->dev, - "The VF VLAN has been set," - " but the PF device is not up.\n"); + "The VF VLAN has been set, but the PF device is not up.\n"); dev_warn(&adapter->pdev->dev, - "Bring the PF device up before" - " attempting to use the VF device.\n"); + "Bring the PF device up before attempting to use the VF device.\n"); } } else { igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, - false, vf); + false, vf); igb_set_vmvir(adapter, vlan, vf); igb_set_vmolr(adapter, vf, true); adapter->vf_data[vf].pf_vlan = 0; adapter->vf_data[vf].pf_qos = 0; - } + } out: - return err; + return err; +} + +static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + u32 reg; + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = rd32(E1000_VLVF(i)); + if ((reg & E1000_VLVF_VLANID_ENABLE) && + vid == (reg & E1000_VLVF_VLANID_MASK)) + break; + } + + if (i >= E1000_VLVF_ARRAY_SIZE) + i = -1; + + return i; } static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) { + struct e1000_hw *hw = &adapter->hw; int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + int err = 0; + + /* If in promiscuous mode we need to make sure the PF also has + * the VLAN filter set. + */ + if (add && (adapter->netdev->flags & IFF_PROMISC)) + err = igb_vlvf_set(adapter, vid, add, + adapter->vfs_allocated_count); + if (err) + goto out; + + err = igb_vlvf_set(adapter, vid, add, vf); + + if (err) + goto out; + + /* Go through all the checks to see if the VLAN filter should + * be wiped completely. + */ + if (!add && (adapter->netdev->flags & IFF_PROMISC)) { + u32 vlvf, bits; + int regndx = igb_find_vlvf_entry(adapter, vid); + + if (regndx < 0) + goto out; + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + vlvf = bits = rd32(E1000_VLVF(regndx)); + bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count); + /* If the filter was removed then ensure PF pool bit + * is cleared if the PF only added itself to the pool + * because the PF is in promiscuous mode. + */ + if ((vlvf & VLAN_VID_MASK) == vid && + !test_bit(vid, adapter->active_vlans) && + !bits) + igb_vlvf_set(adapter, vid, add, + adapter->vfs_allocated_count); + } - return igb_vlvf_set(adapter, vid, add, vf); +out: + return err; } static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) @@ -5350,9 +5966,9 @@ static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) { unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; - /* generate a new mac address as we were hotplug removed/added */ + /* clear mac address as we were hotplug removed/added */ if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC)) - random_ether_addr(vf_mac); + eth_zero_addr(vf_mac); /* process remaining reset events */ igb_vf_reset(adapter, vf); @@ -5382,14 +5998,13 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) /* reply to reset with ack and vf mac address */ msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; - memcpy(addr, vf_mac, 6); + memcpy(addr, vf_mac, ETH_ALEN); igb_write_mbx(hw, msgbuf, 3, vf); } static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) { - /* - * The VF MAC Address is stored in a packed array of bytes + /* The VF MAC Address is stored in a packed array of bytes * starting at the second 32 bit word of the msg array */ unsigned char *addr = (char *)&msg[1]; @@ -5438,11 +6053,9 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) return; - /* - * until the vf completes a reset it should not be + /* until the vf completes a reset it should not be * allowed to start any configuration. */ - if (msgbuf[0] == E1000_VF_RESET) { igb_vf_reset_msg(adapter, vf); return; @@ -5462,9 +6075,8 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); else dev_warn(&pdev->dev, - "VF %d attempted to override administratively " - "set MAC address\nReload the VF driver to " - "resume operations\n", vf); + "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n", + vf); break; case E1000_VF_SET_PROMISC: retval = igb_set_vf_promisc(adapter, msgbuf, vf); @@ -5479,9 +6091,8 @@ static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) retval = -1; if (vf_data->pf_vlan) dev_warn(&pdev->dev, - "VF %d attempted to override administratively " - "set VLAN tag\nReload the VF driver to " - "resume operations\n", vf); + "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n", + vf); else retval = igb_set_vf_vlan(adapter, msgbuf, vf); break; @@ -5550,9 +6161,9 @@ static void igb_set_uta(struct igb_adapter *adapter) } /** - * igb_intr_msi - Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure + * igb_intr_msi - Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure **/ static irqreturn_t igb_intr_msi(int irq, void *data) { @@ -5578,15 +6189,26 @@ static irqreturn_t igb_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } + napi_schedule(&q_vector->napi); return IRQ_HANDLED; } /** - * igb_intr - Legacy Interrupt Handler - * @irq: interrupt number - * @data: pointer to a network interface device structure + * igb_intr - Legacy Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure **/ static irqreturn_t igb_intr(int irq, void *data) { @@ -5594,11 +6216,13 @@ static irqreturn_t igb_intr(int irq, void *data) struct igb_q_vector *q_vector = adapter->q_vector[0]; struct e1000_hw *hw = &adapter->hw; /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No - * need for the IMC write */ + * need for the IMC write + */ u32 icr = rd32(E1000_ICR); /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is - * not set, then the adapter didn't send an interrupt */ + * not set, then the adapter didn't send an interrupt + */ if (!(icr & E1000_ICR_INT_ASSERTED)) return IRQ_NONE; @@ -5619,6 +6243,17 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5638,7 +6273,7 @@ static void igb_ring_irq_enable(struct igb_q_vector *q_vector) } if (!test_bit(__IGB_DOWN, &adapter->state)) { - if (adapter->msix_entries) + if (adapter->flags & IGB_FLAG_HAS_MSIX) wr32(E1000_EIMS, q_vector->eims_value); else igb_irq_enable(adapter); @@ -5646,15 +6281,15 @@ static void igb_ring_irq_enable(struct igb_q_vector *q_vector) } /** - * igb_poll - NAPI Rx polling callback - * @napi: napi polling structure - * @budget: count of how many packets we should handle + * igb_poll - NAPI Rx polling callback + * @napi: napi polling structure + * @budget: count of how many packets we should handle **/ static int igb_poll(struct napi_struct *napi, int budget) { struct igb_q_vector *q_vector = container_of(napi, - struct igb_q_vector, - napi); + struct igb_q_vector, + napi); bool clean_complete = true; #ifdef CONFIG_IGB_DCA @@ -5679,74 +6314,17 @@ static int igb_poll(struct napi_struct *napi, int budget) } /** - * igb_systim_to_hwtstamp - convert system time value to hw timestamp - * @adapter: board private structure - * @shhwtstamps: timestamp structure to update - * @regval: unsigned 64bit system time value. - * - * We need to convert the system time value stored in the RX/TXSTMP registers - * into a hwtstamp which can be used by the upper level timestamping functions - */ -static void igb_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *shhwtstamps, - u64 regval) -{ - u64 ns; - - /* - * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to - * 24 to match clock shift we setup earlier. - */ - if (adapter->hw.mac.type >= e1000_82580) - regval <<= IGB_82580_TSYNC_SHIFT; - - ns = timecounter_cyc2time(&adapter->clock, regval); - timecompare_update(&adapter->compare, ns); - memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps)); - shhwtstamps->hwtstamp = ns_to_ktime(ns); - shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns); -} - -/** - * igb_tx_hwtstamp - utility function which checks for TX time stamp - * @q_vector: pointer to q_vector containing needed info - * @buffer: pointer to igb_tx_buffer structure + * igb_clean_tx_irq - Reclaim resources after transmit completes + * @q_vector: pointer to q_vector containing needed info * - * If we were asked to do hardware stamping and such a time stamp is - * available, then it must have been for this skb here because we only - * allow only one such packet into the queue. - */ -static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, - struct igb_tx_buffer *buffer_info) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps shhwtstamps; - u64 regval; - - /* if skb does not support hw timestamp or TX stamp not valid exit */ - if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) || - !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) - return; - - regval = rd32(E1000_TXSTMPL); - regval |= (u64)rd32(E1000_TXSTMPH) << 32; - - igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval); - skb_tstamp_tx(buffer_info->skb, &shhwtstamps); -} - -/** - * igb_clean_tx_irq - Reclaim resources after transmit completes - * @q_vector: pointer to q_vector containing needed info - * returns true if ring is completely cleaned + * returns true if ring is completely cleaned **/ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) { struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx.ring; struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc, *eop_desc; + union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx.work_limit; unsigned int i = tx_ring->next_to_clean; @@ -5758,16 +6336,16 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_desc = IGB_TX_DESC(tx_ring, i); i -= tx_ring->count; - for (; budget; budget--) { - eop_desc = tx_buffer->next_to_watch; - - /* prevent any other reads prior to eop_desc */ - rmb(); + do { + union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; + /* prevent any other reads prior to eop_desc */ + read_barrier_depends(); + /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) break; @@ -5779,23 +6357,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; - /* retrieve hardware timestamp */ - igb_tx_hwtstamp(q_vector, tx_buffer); - /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; /* unmap skb header data */ dma_unmap_single(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* clear last DMA location and unmap remaining buffers */ while (tx_desc != eop_desc) { - tx_buffer->dma = 0; - tx_buffer++; tx_desc++; i++; @@ -5806,17 +6382,15 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } /* unmap any remaining paged data */ - if (tx_buffer->dma) { + if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); } } - /* clear last DMA location */ - tx_buffer->dma = 0; - /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; tx_desc++; @@ -5826,7 +6400,13 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_buffer = tx_ring->tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } - } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); @@ -5842,12 +6422,11 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { struct e1000_hw *hw = &adapter->hw; - eop_desc = tx_buffer->next_to_watch; - /* Detect a transmit hang in hardware, this serializes the - * check with the clearing of time_stamp and movement of i */ + * check with the clearing of time_stamp and movement of i + */ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); - if (eop_desc && + if (tx_buffer->next_to_watch && time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { @@ -5871,9 +6450,9 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_ring->next_to_use, tx_ring->next_to_clean, tx_buffer->time_stamp, - eop_desc, + tx_buffer->next_to_watch, jiffies, - eop_desc->wb.status); + tx_buffer->next_to_watch->wb.status); netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); @@ -5882,9 +6461,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } } +#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && - netif_carrier_ok(tx_ring->netdev) && - igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { + netif_carrier_ok(tx_ring->netdev) && + igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean. */ @@ -5904,6 +6484,184 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) return !!budget; } +/** + * igb_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void igb_reuse_rx_page(struct igb_ring *rx_ring, + struct igb_rx_buffer *old_buff) +{ + struct igb_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + *new_buff = *old_buff; + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, + old_buff->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); +} + +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, + struct page *page, + unsigned int truesize) +{ + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= IGB_RX_BUFSZ; + + /* since we are the only owner of the page and we need to + * increment it, just set the value to 2 in order to avoid + * an unnecessary locked operation + */ + atomic_set(&page->_count, 2); +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) + return false; + + /* bump ref count on page before it is given to the stack */ + get_page(page); +#endif + + return true; +} + +/** + * igb_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into + * + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. + **/ +static bool igb_add_rx_frag(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = IGB_RX_BUFSZ; +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); +#endif + + if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + va += IGB_TS_HDR_LEN; + size -= IGB_TS_HDR_LEN; + } + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, truesize); + + return igb_can_reuse_rx_page(rx_buffer, page, truesize); +} + +static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct igb_rx_buffer *rx_buffer; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + + page = rx_buffer->page; + prefetchw(page); + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IGB_RX_HDR_LEN); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_failed++; + return NULL; + } + + /* we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + } + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); + + /* pull page into skb */ + if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + igb_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + PAGE_SIZE, DMA_FROM_DEVICE); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; + + return skb; +} + static inline void igb_rx_checksum(struct igb_ring *ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) @@ -5922,8 +6680,7 @@ static inline void igb_rx_checksum(struct igb_ring *ring, if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) { - /* - * work around errata with sctp packets where the TCPE aka + /* work around errata with sctp packets where the TCPE aka * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) * packets, (aka let the stack check the crc32c) */ @@ -5950,186 +6707,351 @@ static inline void igb_rx_hash(struct igb_ring *ring, struct sk_buff *skb) { if (ring->netdev->features & NETIF_F_RXHASH) - skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); + skb_set_hash(skb, + le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + PKT_HASH_TYPE_L3); } -static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) +/** + * igb_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * @skb: current socket buffer containing buffer in progress + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool igb_is_non_eop(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc) { - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - u64 regval; + u32 ntc = rx_ring->next_to_clean + 1; - if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | - E1000_RXDADV_STAT_TS)) - return; + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; - /* - * If this bit is set, then the RX registers contain the time stamp. No - * other packet will be time stamped until we read these registers, so - * read the registers to make them available again. Because only one - * packet can be time stamped at a time, we know that the register - * values must belong to this one here and therefore we don't need to - * compare any of the additional attributes stored for it. - * - * If nothing went wrong, then it should have a shared tx_flags that we - * can turn into a skb_shared_hwtstamps. + prefetch(IGB_RX_DESC(rx_ring, ntc)); + + if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))) + return false; + + return true; +} + +/** + * igb_get_headlen - determine size of header for LRO/GRO + * @data: pointer to the start of the headers + * @max_len: total length of section to find headers in + * + * This function is meant to determine the length of headers that will + * be recognized by hardware for LRO, and GRO offloads. The main + * motivation of doing this is to only perform one pull for IPv4 TCP + * packets so that we can do basic things like calculating the gso_size + * based on the average data per packet. + **/ +static unsigned int igb_get_headlen(unsigned char *data, + unsigned int max_len) +{ + union { + unsigned char *network; + /* l2 headers */ + struct ethhdr *eth; + struct vlan_hdr *vlan; + /* l3 headers */ + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + } hdr; + __be16 protocol; + u8 nexthdr = 0; /* default to not TCP */ + u8 hlen; + + /* this should never happen, but better safe than sorry */ + if (max_len < ETH_HLEN) + return max_len; + + /* initialize network frame pointer */ + hdr.network = data; + + /* set first protocol and move network header forward */ + protocol = hdr.eth->h_proto; + hdr.network += ETH_HLEN; + + /* handle any vlan tag if present */ + if (protocol == htons(ETH_P_8021Q)) { + if ((hdr.network - data) > (max_len - VLAN_HLEN)) + return max_len; + + protocol = hdr.vlan->h_vlan_encapsulated_proto; + hdr.network += VLAN_HLEN; + } + + /* handle L3 protocols */ + if (protocol == htons(ETH_P_IP)) { + if ((hdr.network - data) > (max_len - sizeof(struct iphdr))) + return max_len; + + /* access ihl as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct iphdr)) + return hdr.network - data; + + /* record next protocol if header is present */ + if (!(hdr.ipv4->frag_off & htons(IP_OFFSET))) + nexthdr = hdr.ipv4->protocol; + } else if (protocol == htons(ETH_P_IPV6)) { + if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) + return max_len; + + /* record next protocol */ + nexthdr = hdr.ipv6->nexthdr; + hlen = sizeof(struct ipv6hdr); + } else { + return hdr.network - data; + } + + /* relocate pointer to start of L4 header */ + hdr.network += hlen; + + /* finally sort out TCP */ + if (nexthdr == IPPROTO_TCP) { + if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) + return max_len; + + /* access doff as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[12] & 0xF0) >> 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct tcphdr)) + return hdr.network - data; + + hdr.network += hlen; + } else if (nexthdr == IPPROTO_UDP) { + if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) + return max_len; + + hdr.network += sizeof(struct udphdr); + } + + /* If everything has gone correctly hdr.network should be the + * data section of the packet and will be the end of the header. + * If not then it probably represents the end of the last recognized + * header. */ + if ((hdr.network - data) < max_len) + return hdr.network - data; + else + return max_len; +} + +/** + * igb_pull_tail - igb specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being adjusted + * + * This function is an igb specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void igb_pull_tail(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; + + /* it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - u32 *stamp = (u32 *)skb->data; - regval = le32_to_cpu(*(stamp + 2)); - regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; - skb_pull(skb, IGB_TS_HDR_LEN); - } else { - if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) - return; + /* retrieve timestamp from buffer */ + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + + /* update pointers to remove timestamp header */ + skb_frag_size_sub(frag, IGB_TS_HDR_LEN); + frag->page_offset += IGB_TS_HDR_LEN; + skb->data_len -= IGB_TS_HDR_LEN; + skb->len -= IGB_TS_HDR_LEN; - regval = rd32(E1000_RXSTMPL); - regval |= (u64)rd32(E1000_RXSTMPH) << 32; + /* move va to start of packet data */ + va += IGB_TS_HDR_LEN; } - igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); + /* we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; } -static void igb_rx_vlan(struct igb_ring *ring, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) +/** + * igb_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool igb_cleanup_headers(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (unlikely((igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { + struct net_device *netdev = rx_ring->netdev; + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + igb_pull_tail(rx_ring, rx_desc, skb); + + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; + + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } + + return false; +} + +/** + * igb_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, timestamp, protocol, and + * other fields within the skb. + **/ +static void igb_process_skb_fields(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { + struct net_device *dev = rx_ring->netdev; + + igb_rx_hash(rx_ring, rx_desc, skb); + + igb_rx_checksum(rx_ring, rx_desc, skb); + + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS) && + !igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) + igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb); + + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && + igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { u16 vid; + if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && - test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags)) + test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) vid = be16_to_cpu(rx_desc->wb.upper.vlan); else vid = le16_to_cpu(rx_desc->wb.upper.vlan); - __vlan_hwaccel_put_tag(skb, vid); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); } -} -static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc) -{ - /* HW will not DMA in data larger than the given buffer, even if it - * parses the (NFS, of course) header to be larger. In that case, it - * fills the header buffer and spills the rest into the page. - */ - u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) & - E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT; - if (hlen > IGB_RX_HDR_LEN) - hlen = IGB_RX_HDR_LEN; - return hlen; + skb_record_rx_queue(skb, rx_ring->queue_index); + + skb->protocol = eth_type_trans(skb, rx_ring->netdev); } -static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) +static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) { struct igb_ring *rx_ring = q_vector->rx.ring; - union e1000_adv_rx_desc *rx_desc; - const int current_node = numa_node_id(); + struct sk_buff *skb = rx_ring->skb; unsigned int total_bytes = 0, total_packets = 0; u16 cleaned_count = igb_desc_unused(rx_ring); - u16 i = rx_ring->next_to_clean; - rx_desc = IGB_RX_DESC(rx_ring, i); - - while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { - struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; - struct sk_buff *skb = buffer_info->skb; - union e1000_adv_rx_desc *next_rxd; + while (likely(total_packets < budget)) { + union e1000_adv_rx_desc *rx_desc; - buffer_info->skb = NULL; - prefetch(skb->data); + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IGB_RX_BUFFER_WRITE) { + igb_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } - i++; - if (i == rx_ring->count) - i = 0; + rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); - next_rxd = IGB_RX_DESC(rx_ring, i); - prefetch(next_rxd); + if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) + break; - /* - * This memory barrier is needed to keep us from reading + /* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we know the * RXD_STAT_DD bit is set */ rmb(); - if (!skb_is_nonlinear(skb)) { - __skb_put(skb, igb_get_hlen(rx_desc)); - dma_unmap_single(rx_ring->dev, buffer_info->dma, - IGB_RX_HDR_LEN, - DMA_FROM_DEVICE); - buffer_info->dma = 0; - } - - if (rx_desc->wb.upper.length) { - u16 length = le16_to_cpu(rx_desc->wb.upper.length); - - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - buffer_info->page, - buffer_info->page_offset, - length); + /* retrieve a buffer from the ring */ + skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); - skb->len += length; - skb->data_len += length; - skb->truesize += PAGE_SIZE / 2; - - if ((page_count(buffer_info->page) != 1) || - (page_to_nid(buffer_info->page) != current_node)) - buffer_info->page = NULL; - else - get_page(buffer_info->page); + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; - dma_unmap_page(rx_ring->dev, buffer_info->page_dma, - PAGE_SIZE / 2, DMA_FROM_DEVICE); - buffer_info->page_dma = 0; - } + cleaned_count++; - if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) { - struct igb_rx_buffer *next_buffer; - next_buffer = &rx_ring->rx_buffer_info[i]; - buffer_info->skb = next_buffer->skb; - buffer_info->dma = next_buffer->dma; - next_buffer->skb = skb; - next_buffer->dma = 0; - goto next_desc; - } + /* fetch next buffer in frame if non-eop */ + if (igb_is_non_eop(rx_ring, rx_desc)) + continue; - if (igb_test_staterr(rx_desc, - E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { - dev_kfree_skb_any(skb); - goto next_desc; + /* verify the packet layout is correct */ + if (igb_cleanup_headers(rx_ring, rx_desc, skb)) { + skb = NULL; + continue; } - igb_rx_hwtstamp(q_vector, rx_desc, skb); - igb_rx_hash(rx_ring, rx_desc, skb); - igb_rx_checksum(rx_ring, rx_desc, skb); - igb_rx_vlan(rx_ring, rx_desc, skb); - + /* probably a little skewed due to removing CRC */ total_bytes += skb->len; - total_packets++; - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + /* populate checksum, timestamp, VLAN, and protocol */ + igb_process_skb_fields(rx_ring, rx_desc, skb); napi_gro_receive(&q_vector->napi, skb); - budget--; -next_desc: - if (!budget) - break; + /* reset skb pointer */ + skb = NULL; - cleaned_count++; - /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IGB_RX_BUFFER_WRITE) { - igb_alloc_rx_buffers(rx_ring, cleaned_count); - cleaned_count = 0; - } - - /* use prefetched values */ - rx_desc = next_rxd; + /* update budget accounting */ + total_packets++; } - rx_ring->next_to_clean = i; + /* place incomplete frames back on ring for completion */ + rx_ring->skb = skb; + u64_stats_update_begin(&rx_ring->rx_syncp); rx_ring->rx_stats.packets += total_packets; rx_ring->rx_stats.bytes += total_bytes; @@ -6140,79 +7062,49 @@ next_desc: if (cleaned_count) igb_alloc_rx_buffers(rx_ring, cleaned_count); - return !!budget; -} - -static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, - struct igb_rx_buffer *bi) -{ - struct sk_buff *skb = bi->skb; - dma_addr_t dma = bi->dma; - - if (dma) - return true; - - if (likely(!skb)) { - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IGB_RX_HDR_LEN); - bi->skb = skb; - if (!skb) { - rx_ring->rx_stats.alloc_failed++; - return false; - } - - /* initialize skb for ring */ - skb_record_rx_queue(skb, rx_ring->queue_index); - } - - dma = dma_map_single(rx_ring->dev, skb->data, - IGB_RX_HDR_LEN, DMA_FROM_DEVICE); - - if (dma_mapping_error(rx_ring->dev, dma)) { - rx_ring->rx_stats.alloc_failed++; - return false; - } - - bi->dma = dma; - return true; + return total_packets < budget; } static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, struct igb_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t page_dma = bi->page_dma; - unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2); + dma_addr_t dma; - if (page_dma) + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) return true; - if (!page) { - page = alloc_page(GFP_ATOMIC | __GFP_COLD); - bi->page = page; - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_failed++; - return false; - } + /* alloc new page for storage */ + page = __skb_alloc_page(GFP_ATOMIC | __GFP_COLD, NULL); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_failed++; + return false; } - page_dma = dma_map_page(rx_ring->dev, page, - page_offset, PAGE_SIZE / 2, - DMA_FROM_DEVICE); + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + + /* if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_page(page); - if (dma_mapping_error(rx_ring->dev, page_dma)) { rx_ring->rx_stats.alloc_failed++; return false; } - bi->page_dma = page_dma; - bi->page_offset = page_offset; + bi->dma = dma; + bi->page = page; + bi->page_offset = 0; + return true; } /** - * igb_alloc_rx_buffers - Replace used receive buffers; packet split - * @adapter: address of board private structure + * igb_alloc_rx_buffers - Replace used receive buffers; packet split + * @adapter: address of board private structure **/ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) { @@ -6220,22 +7112,22 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) struct igb_rx_buffer *bi; u16 i = rx_ring->next_to_use; + /* nothing to do */ + if (!cleaned_count) + return; + rx_desc = IGB_RX_DESC(rx_ring, i); bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; - while (cleaned_count--) { - if (!igb_alloc_mapped_skb(rx_ring, bi)) - break; - - /* Refresh the desc even if buffer_addrs didn't change - * because each write-back erases this info. */ - rx_desc->read.hdr_addr = cpu_to_le64(bi->dma); - + do { if (!igb_alloc_mapped_page(rx_ring, bi)) break; - rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma); + /* Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); rx_desc++; bi++; @@ -6248,17 +7140,24 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) /* clear the hdr_addr for the next_to_use descriptor */ rx_desc->read.hdr_addr = 0; - } + + cleaned_count--; + } while (cleaned_count); i += rx_ring->count; if (rx_ring->next_to_use != i) { + /* record the next descriptor to use */ rx_ring->next_to_use = i; + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = i; + /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, - * such as IA-64). */ + * such as IA-64). + */ wmb(); writel(i, rx_ring->tail); } @@ -6284,7 +7183,7 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) break; case SIOCGMIIREG: if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, - &data->val_out)) + &data->val_out)) return -EIO; break; case SIOCSMIIREG: @@ -6295,181 +7194,6 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } /** - * igb_hwtstamp_ioctl - control hardware time stamping - * @netdev: - * @ifreq: - * @cmd: - * - * Outgoing time stamping can be enabled and disabled. Play nice and - * disable it when requested, although it shouldn't case any overhead - * when no packet needs it. At most one packet in the queue may be - * marked for time stamping, otherwise it would be impossible to tell - * for sure to which packet the hardware time stamp belongs. - * - * Incoming time stamping has to be configured via the hardware - * filters. Not all combinations are supported, in particular event - * type has to be specified. Matching the kind of event packet is - * not supported, with the exception of "all V2 events regardless of - * level 2 or 4". - * - **/ -static int igb_hwtstamp_ioctl(struct net_device *netdev, - struct ifreq *ifr, int cmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct hwtstamp_config config; - u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; - u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - u32 tsync_rx_cfg = 0; - bool is_l4 = false; - bool is_l2 = false; - u32 regval; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - tsync_tx_ctl = 0; - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - tsync_rx_ctl = 0; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_ALL: - /* - * register TSYNCRXCFG must be set, therefore it is not - * possible to time stamp both Sync and Delay_Req messages - * => fall back to time stamping all packets - */ - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = true; - is_l4 = true; - break; - default: - return -ERANGE; - } - - if (hw->mac.type == e1000_82575) { - if (tsync_rx_ctl | tsync_tx_ctl) - return -EINVAL; - return 0; - } - - /* - * Per-packet timestamping only works if all packets are - * timestamped, so enable timestamping in all packets as - * long as one rx filter was configured. - */ - if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { - tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - } - - /* enable/disable TX */ - regval = rd32(E1000_TSYNCTXCTL); - regval &= ~E1000_TSYNCTXCTL_ENABLED; - regval |= tsync_tx_ctl; - wr32(E1000_TSYNCTXCTL, regval); - - /* enable/disable RX */ - regval = rd32(E1000_TSYNCRXCTL); - regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); - regval |= tsync_rx_ctl; - wr32(E1000_TSYNCRXCTL, regval); - - /* define which PTP packets are time stamped */ - wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); - - /* define ethertype filter for timestamped packets */ - if (is_l2) - wr32(E1000_ETQF(3), - (E1000_ETQF_FILTER_ENABLE | /* enable filter */ - E1000_ETQF_1588 | /* enable timestamping */ - ETH_P_1588)); /* 1588 eth protocol type */ - else - wr32(E1000_ETQF(3), 0); - -#define PTP_PORT 319 - /* L4 Queue Filter[3]: filter by destination port and protocol */ - if (is_l4) { - u32 ftqf = (IPPROTO_UDP /* UDP */ - | E1000_FTQF_VF_BP /* VF not compared */ - | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ - | E1000_FTQF_MASK); /* mask all inputs */ - ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ - - wr32(E1000_IMIR(3), htons(PTP_PORT)); - wr32(E1000_IMIREXT(3), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); - if (hw->mac.type == e1000_82576) { - /* enable source port check */ - wr32(E1000_SPQF(3), htons(PTP_PORT)); - ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; - } - wr32(E1000_FTQF(3), ftqf); - } else { - wr32(E1000_FTQF(3), E1000_FTQF_MASK); - } - wrfl(); - - adapter->hwtstamp_config = config; - - /* clear TX/RX time stamp registers, just to be sure */ - regval = rd32(E1000_TXSTMPH); - regval = rd32(E1000_RXSTMPH); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -/** * igb_ioctl - * @netdev: * @ifreq: @@ -6482,38 +7206,46 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); + case SIOCGHWTSTAMP: + return igb_ptp_get_ts_config(netdev, ifr); case SIOCSHWTSTAMP: - return igb_hwtstamp_ioctl(netdev, ifr, cmd); + return igb_ptp_set_ts_config(netdev, ifr); default: return -EOPNOTSUPP; } } +void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + + pci_read_config_word(adapter->pdev, reg, value); +} + +void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + + pci_write_config_word(adapter->pdev, reg, *value); +} + s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) { struct igb_adapter *adapter = hw->back; - u16 cap_offset; - cap_offset = adapter->pdev->pcie_cap; - if (!cap_offset) + if (pcie_capability_read_word(adapter->pdev, reg, value)) return -E1000_ERR_CONFIG; - pci_read_config_word(adapter->pdev, cap_offset + reg, value); - return 0; } s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) { struct igb_adapter *adapter = hw->back; - u16 cap_offset; - cap_offset = adapter->pdev->pcie_cap; - if (!cap_offset) + if (pcie_capability_write_word(adapter->pdev, reg, *value)) return -E1000_ERR_CONFIG; - pci_write_config_word(adapter->pdev, cap_offset + reg, *value); - return 0; } @@ -6522,7 +7254,7 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; u32 ctrl, rctl; - bool enable = !!(features & NETIF_F_HW_VLAN_RX); + bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); if (enable) { /* enable VLAN tag insert/strip */ @@ -6544,7 +7276,8 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features) igb_rlpml_set(adapter); } -static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +static int igb_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -6561,7 +7294,8 @@ static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) return 0; } -static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +static int igb_vlan_rx_kill_vid(struct net_device *netdev, + __be16 proto, u16 vid) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -6587,7 +7321,7 @@ static void igb_restore_vlan(struct igb_adapter *adapter) igb_vlan_mode(adapter->netdev, adapter->netdev->features); for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - igb_vlan_rx_add_vid(adapter->netdev, vid); + igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid); } int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) @@ -6598,15 +7332,24 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) mac->autoneg = 0; /* Make sure dplx is at most 1 bit and lsb of speed is not set - * for the switch() below to work */ + * for the switch() below to work + */ if ((spd & 1) || (dplx & ~1)) goto err_inval; - /* Fiber NIC's only allow 1000 Gbps Full duplex */ - if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) && - spd != SPEED_1000 && - dplx != DUPLEX_FULL) - goto err_inval; + /* Fiber NIC's only allow 1000 gbps Full duplex + * and 100Mbps Full duplex for 100baseFx sfp + */ + if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { + switch (spd + dplx) { + case SPEED_10 + DUPLEX_HALF: + case SPEED_10 + DUPLEX_FULL: + case SPEED_100 + DUPLEX_HALF: + goto err_inval; + default: + break; + } + } switch (spd + dplx) { case SPEED_10 + DUPLEX_HALF: @@ -6629,6 +7372,10 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: @@ -6701,7 +7448,8 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, igb_power_up_link(adapter); /* Release control of h/w to f/w. If f/w is AMT enabled, this - * would have already happened in close and is redundant. */ + * would have already happened in close and is redundant. + */ igb_release_hw_control(adapter); pci_disable_device(pdev); @@ -6710,6 +7458,7 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, } #ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int igb_suspend(struct device *dev) { int retval; @@ -6729,6 +7478,7 @@ static int igb_suspend(struct device *dev) return 0; } +#endif /* CONFIG_PM_SLEEP */ static int igb_resume(struct device *dev) { @@ -6753,18 +7503,7 @@ static int igb_resume(struct device *dev) pci_enable_wake(pdev, PCI_D3hot, 0); pci_enable_wake(pdev, PCI_D3cold, 0); - if (!rtnl_is_locked()) { - /* - * shut up ASSERT_RTNL() warning in - * netif_set_real_num_tx/rx_queues. - */ - rtnl_lock(); - err = igb_init_interrupt_scheme(adapter); - rtnl_unlock(); - } else { - err = igb_init_interrupt_scheme(adapter); - } - if (err) { + if (igb_init_interrupt_scheme(adapter, true)) { dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); return -ENOMEM; } @@ -6772,13 +7511,16 @@ static int igb_resume(struct device *dev) igb_reset(adapter); /* let the f/w know that the h/w is now under the control of the - * driver. */ + * driver. + */ igb_get_hw_control(adapter); wr32(E1000_WUS, ~0); if (netdev->flags & IFF_UP) { + rtnl_lock(); err = __igb_open(netdev, true); + rtnl_unlock(); if (err) return err; } @@ -6839,9 +7581,76 @@ static void igb_shutdown(struct pci_dev *pdev) } } +#ifdef CONFIG_PCI_IOV +static int igb_sriov_reinit(struct pci_dev *dev) +{ + struct net_device *netdev = pci_get_drvdata(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct pci_dev *pdev = adapter->pdev; + + rtnl_lock(); + + if (netif_running(netdev)) + igb_close(netdev); + else + igb_reset(adapter); + + igb_clear_interrupt_scheme(adapter); + + igb_init_queue_configuration(adapter); + + if (igb_init_interrupt_scheme(adapter, true)) { + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + if (netif_running(netdev)) + igb_open(netdev); + + rtnl_unlock(); + + return 0; +} + +static int igb_pci_disable_sriov(struct pci_dev *dev) +{ + int err = igb_disable_sriov(dev); + + if (!err) + err = igb_sriov_reinit(dev); + + return err; +} + +static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs) +{ + int err = igb_enable_sriov(dev, num_vfs); + + if (err) + goto out; + + err = igb_sriov_reinit(dev); + if (!err) + return num_vfs; + +out: + return err; +} + +#endif +static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs) +{ +#ifdef CONFIG_PCI_IOV + if (num_vfs == 0) + return igb_pci_disable_sriov(dev); + else + return igb_pci_enable_sriov(dev, num_vfs); +#endif + return 0; +} + #ifdef CONFIG_NET_POLL_CONTROLLER -/* - * Polling 'interrupt' - used by things like netconsole to send skbs +/* Polling 'interrupt' - used by things like netconsole to send skbs * without having to re-enable interrupts. It's not called while * the interrupt routine is executing. */ @@ -6854,7 +7663,7 @@ static void igb_netpoll(struct net_device *netdev) for (i = 0; i < adapter->num_q_vectors; i++) { q_vector = adapter->q_vector[i]; - if (adapter->msix_entries) + if (adapter->flags & IGB_FLAG_HAS_MSIX) wr32(E1000_EIMC, q_vector->eims_value); else igb_irq_disable(adapter); @@ -6864,13 +7673,13 @@ static void igb_netpoll(struct net_device *netdev) #endif /* CONFIG_NET_POLL_CONTROLLER */ /** - * igb_io_error_detected - called when PCI error is detected - * @pdev: Pointer to PCI device - * @state: The current pci connection state + * igb_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci connection state * - * This function is called after a PCI bus error affecting - * this device has been detected. - */ + * This function is called after a PCI bus error affecting + * this device has been detected. + **/ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { @@ -6891,12 +7700,12 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, } /** - * igb_io_slot_reset - called after the pci bus has been reset. - * @pdev: Pointer to PCI device + * igb_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device * - * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igb_resume routine. - */ + * Restart the card from scratch, as if from a cold-boot. Implementation + * resembles the first-half of the igb_resume routine. + **/ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -6924,8 +7733,9 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) err = pci_cleanup_aer_uncorrect_error_status(pdev); if (err) { - dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status " - "failed 0x%0x\n", err); + dev_err(&pdev->dev, + "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n", + err); /* non-fatal, continue */ } @@ -6933,12 +7743,12 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) } /** - * igb_io_resume - called when traffic can start flowing again. - * @pdev: Pointer to PCI device + * igb_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device * - * This callback is called when the error recovery driver tells us that - * its OK to resume normal operation. Implementation resembles the - * second-half of the igb_resume routine. + * This callback is called when the error recovery driver tells us that + * its OK to resume normal operation. Implementation resembles the + * second-half of the igb_resume routine. */ static void igb_io_resume(struct pci_dev *pdev) { @@ -6955,12 +7765,13 @@ static void igb_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); /* let the f/w know that the h/w is now under the control of the - * driver. */ + * driver. + */ igb_get_hw_control(adapter); } static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, - u8 qsel) + u8 qsel) { u32 rar_low, rar_high; struct e1000_hw *hw = &adapter->hw; @@ -6969,7 +7780,7 @@ static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, * from network order (big endian) to little endian */ rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | - ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); + ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); /* Indicate to hardware the Address is Valid. */ @@ -6987,11 +7798,12 @@ static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index, } static int igb_set_vf_mac(struct igb_adapter *adapter, - int vf, unsigned char *mac_addr) + int vf, unsigned char *mac_addr) { struct e1000_hw *hw = &adapter->hw; /* VF MAC addresses start at end of receive addresses and moves - * torwards the first, as a result a collision should not be possible */ + * towards the first, as a result a collision should not be possible + */ int rar_entry = hw->mac.rar_entry_count - (vf + 1); memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); @@ -7008,13 +7820,13 @@ static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) return -EINVAL; adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); - dev_info(&adapter->pdev->dev, "Reload the VF driver to make this" - " change effective."); + dev_info(&adapter->pdev->dev, + "Reload the VF driver to make this change effective."); if (test_bit(__IGB_DOWN, &adapter->state)) { - dev_warn(&adapter->pdev->dev, "The VF MAC address has been set," - " but the PF device is not up.\n"); - dev_warn(&adapter->pdev->dev, "Bring the PF device up before" - " attempting to use the VF device.\n"); + dev_warn(&adapter->pdev->dev, + "The VF MAC address has been set, but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before attempting to use the VF device.\n"); } return igb_set_vf_mac(adapter, vf, mac); } @@ -7041,17 +7853,22 @@ static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, /* Calculate the rate factor values to set */ rf_int = link_speed / tx_rate; rf_dec = (link_speed - (rf_int * tx_rate)); - rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; + rf_dec = (rf_dec * (1 << E1000_RTTBCNRC_RF_INT_SHIFT)) / + tx_rate; bcnrc_val = E1000_RTTBCNRC_RS_ENA; - bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) & - E1000_RTTBCNRC_RF_INT_MASK); + bcnrc_val |= ((rf_int << E1000_RTTBCNRC_RF_INT_SHIFT) & + E1000_RTTBCNRC_RF_INT_MASK); bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK); } else { bcnrc_val = 0; } wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */ + /* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM + * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported. + */ + wr32(E1000_RTTBCNRM, 0x14); wr32(E1000_RTTBCNRC, bcnrc_val); } @@ -7070,8 +7887,7 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter) reset_rate = true; adapter->vf_rate_link_speed = 0; dev_info(&adapter->pdev->dev, - "Link speed has been changed. VF Transmit " - "rate is disabled\n"); + "Link speed has been changed. VF Transmit rate is disabled\n"); } for (i = 0; i < adapter->vfs_allocated_count; i++) { @@ -7079,12 +7895,13 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter) adapter->vf_data[i].tx_rate = 0; igb_set_vf_rate_limit(&adapter->hw, i, - adapter->vf_data[i].tx_rate, - actual_link_speed); + adapter->vf_data[i].tx_rate, + actual_link_speed); } } -static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) { struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; @@ -7093,19 +7910,50 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) if (hw->mac.type != e1000_82576) return -EOPNOTSUPP; + if (min_tx_rate) + return -EINVAL; + actual_link_speed = igb_link_mbps(adapter->link_speed); if ((vf >= adapter->vfs_allocated_count) || (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) || - (tx_rate < 0) || (tx_rate > actual_link_speed)) + (max_tx_rate < 0) || + (max_tx_rate > actual_link_speed)) return -EINVAL; adapter->vf_rate_link_speed = actual_link_speed; - adapter->vf_data[vf].tx_rate = (u16)tx_rate; - igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + adapter->vf_data[vf].tx_rate = (u16)max_tx_rate; + igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed); return 0; } +static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, + bool setting) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 reg_val, reg_offset; + + if (!adapter->vfs_allocated_count) + return -EOPNOTSUPP; + + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + + reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC; + reg_val = rd32(reg_offset); + if (setting) + reg_val |= ((1 << vf) | + (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + else + reg_val &= ~((1 << vf) | + (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + wr32(reg_offset, reg_val); + + adapter->vf_data[vf].spoofchk_enabled = setting; + return 0; +} + static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi) { @@ -7114,9 +7962,11 @@ static int igb_ndo_get_vf_config(struct net_device *netdev, return -EINVAL; ivi->vf = vf; memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); - ivi->tx_rate = adapter->vf_data[vf].tx_rate; + ivi->max_tx_rate = adapter->vf_data[vf].tx_rate; + ivi->min_tx_rate = 0; ivi->vlan = adapter->vf_data[vf].pf_vlan; ivi->qos = adapter->vf_data[vf].pf_qos; + ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; return 0; } @@ -7127,6 +7977,9 @@ static void igb_vmm_control(struct igb_adapter *adapter) switch (hw->mac.type) { case e1000_82575: + case e1000_i210: + case e1000_i211: + case e1000_i354: default: /* replication is not supported for 82575 */ return; @@ -7135,11 +7988,13 @@ static void igb_vmm_control(struct igb_adapter *adapter) reg = rd32(E1000_DTXCTL); reg |= E1000_DTXCTL_VLAN_ADDED; wr32(E1000_DTXCTL, reg); + /* Fall through */ case e1000_82580: /* enable replication vlan tag stripping */ reg = rd32(E1000_RPLOLR); reg |= E1000_RPLOLR_STRVLAN; wr32(E1000_RPLOLR, reg); + /* Fall through */ case e1000_i350: /* none of the above registers are supported by i350 */ break; @@ -7149,7 +8004,7 @@ static void igb_vmm_control(struct igb_adapter *adapter) igb_vmdq_set_loopback_pf(hw, true); igb_vmdq_set_replication_pf(hw, true); igb_vmdq_set_anti_spoofing_pf(hw, true, - adapter->vfs_allocated_count); + adapter->vfs_allocated_count); } else { igb_vmdq_set_loopback_pf(hw, false); igb_vmdq_set_replication_pf(hw, false); @@ -7169,8 +8024,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* force threshold to 0. */ wr32(E1000_DMCTXTH, 0); - /* - * DMA Coalescing high water mark needs to be greater + /* DMA Coalescing high water mark needs to be greater * than the Rx threshold. Set hwm to PBA - max frame * size in 16B units, capping it at PBA - 6KB. */ @@ -7183,8 +8037,7 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) & E1000_FCRTC_RTH_COAL_MASK); wr32(E1000_FCRTC, reg); - /* - * Set the DMA Coalescing Rx threshold to PBA - 2 * max + /* Set the DMA Coalescing Rx threshold to PBA - 2 * max * frame size, capping it at PBA - 10KB. */ dmac_thr = pba - adapter->max_frame_size / 512; @@ -7200,10 +8053,14 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) /* watchdog timer= +-1000 usec in 32usec intervals */ reg |= (1000 >> 5); + + /* Disable BMC-to-OS Watchdog Enable */ + if (hw->mac.type != e1000_i354) + reg &= ~E1000_DMACR_DC_BMC2OSW_EN; + wr32(E1000_DMACR, reg); - /* - * no lower threshold to disable + /* no lower threshold to disable * coalescing(smart fifb)-UTRESH=0 */ wr32(E1000_DMCRTRH, 0); @@ -7212,15 +8069,13 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) wr32(E1000_DMCTLX, reg); - /* - * free space in tx packet buffer to wake from + /* free space in tx packet buffer to wake from * DMA coal */ wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); - /* - * make low power state decision controlled + /* make low power state decision controlled * by DMA coal */ reg = rd32(E1000_PCIEMISC); @@ -7229,9 +8084,101 @@ static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) } /* endif adapter->dmac is not disabled */ } else if (hw->mac.type == e1000_82580) { u32 reg = rd32(E1000_PCIEMISC); + wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION); wr32(E1000_DMACR, 0); } } +/** + * igb_read_i2c_byte - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @dev_addr: device address + * @data: value read + * + * Performs byte read operation over I2C interface at + * a specified device address. + **/ +s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); + struct i2c_client *this_client = adapter->i2c_client; + s32 status; + u16 swfw_mask = 0; + + if (!this_client) + return E1000_ERR_I2C; + + swfw_mask = E1000_SWFW_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) + return E1000_ERR_SWFW_SYNC; + + status = i2c_smbus_read_byte_data(this_client, byte_offset); + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + + if (status < 0) + return E1000_ERR_I2C; + else { + *data = status; + return 0; + } +} + +/** + * igb_write_i2c_byte - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: value to write + * + * Performs byte write operation over I2C interface at + * a specified device address. + **/ +s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); + struct i2c_client *this_client = adapter->i2c_client; + s32 status; + u16 swfw_mask = E1000_SWFW_PHY0_SM; + + if (!this_client) + return E1000_ERR_I2C; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask)) + return E1000_ERR_SWFW_SYNC; + status = i2c_smbus_write_byte_data(this_client, byte_offset, data); + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + + if (status) + return E1000_ERR_I2C; + else + return 0; + +} + +int igb_reinit_queues(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + if (netif_running(netdev)) + igb_close(netdev); + + igb_reset_interrupt_capability(adapter); + + if (igb_init_interrupt_scheme(adapter, true)) { + dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + if (netif_running(netdev)) + err = igb_open(netdev); + + return err; +} /* igb_main.c */ |
