diff options
Diffstat (limited to 'drivers/net/ethernet')
147 files changed, 10165 insertions, 4506 deletions
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index b1536663514..bb9670f29b5 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -364,7 +364,7 @@ typhoon_inc_rxfree_index(u32 *index, const int count) static inline void typhoon_inc_tx_index(u32 *index, const int count) { - /* if we start using the Hi Tx ring, this needs updateing */ + /* if we start using the Hi Tx ring, this needs updating */ typhoon_inc_index(index, count, TXLO_ENTRIES); } diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index a11af5cc484..e4ff3894911 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -89,15 +89,6 @@ source "drivers/net/ethernet/marvell/Kconfig" source "drivers/net/ethernet/mellanox/Kconfig" source "drivers/net/ethernet/micrel/Kconfig" source "drivers/net/ethernet/microchip/Kconfig" - -config MIPS_SIM_NET - tristate "MIPS simulator Network device" - depends on MIPS_SIM - ---help--- - The MIPSNET device is a simple Ethernet network device which is - emulated by the MIPS Simulator. - If you are not using a MIPSsim or are unsure, say N. - source "drivers/net/ethernet/myricom/Kconfig" config FEALNX diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 878ad32b93f..d4473072654 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -40,7 +40,6 @@ obj-$(CONFIG_NET_VENDOR_MARVELL) += marvell/ obj-$(CONFIG_NET_VENDOR_MELLANOX) += mellanox/ obj-$(CONFIG_NET_VENDOR_MICREL) += micrel/ obj-$(CONFIG_NET_VENDOR_MICROCHIP) += microchip/ -obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/ obj-$(CONFIG_FEALNX) += fealnx.o obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/ diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 1bf5bbfe778..55a2e379505 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -149,7 +149,7 @@ static void atl1c_reset_pcie(struct atl1c_hw *hw, u32 flag) data &= ~(PCI_ERR_UNC_DLP | PCI_ERR_UNC_FCP); pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, data); /* clear error status */ - pci_write_config_word(pdev, pci_pcie_cap(pdev) + PCI_EXP_DEVSTA, + pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_NFED | PCI_EXP_DEVSTA_FED | PCI_EXP_DEVSTA_CED | @@ -2685,7 +2685,7 @@ static void atl1c_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); } -static struct pci_error_handlers atl1c_err_handler = { +static const struct pci_error_handlers atl1c_err_handler = { .error_detected = atl1c_io_error_detected, .slot_reset = atl1c_io_slot_reset, .resume = atl1c_io_resume, diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index a98acc8a956..e213da29e73 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -2489,7 +2489,7 @@ static void atl1e_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); } -static struct pci_error_handlers atl1e_err_handler = { +static const struct pci_error_handlers atl1e_err_handler = { .error_detected = atl1e_io_error_detected, .slot_reset = atl1e_io_slot_reset, .resume = atl1e_io_resume, diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index f15e72e81ac..4bd416b72e6 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -101,6 +101,7 @@ config TIGON3 tristate "Broadcom Tigon3 support" depends on PCI select PHYLIB + select HWMON ---help--- This driver supports Broadcom Tigon3 based gigabit Ethernet cards. diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index e48312f2305..d4310700c7a 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -8742,7 +8742,7 @@ static void bnx2_io_resume(struct pci_dev *pdev) rtnl_unlock(); } -static struct pci_error_handlers bnx2_err_handler = { +static const struct pci_error_handlers bnx2_err_handler = { .error_detected = bnx2_io_error_detected, .slot_reset = bnx2_io_slot_reset, .resume = bnx2_io_resume, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 6d1a24acb77..72897c47b8c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -23,8 +23,8 @@ * (you will need to reboot afterwards) */ /* #define BNX2X_STOP_ON_ERROR */ -#define DRV_MODULE_VERSION "1.72.51-0" -#define DRV_MODULE_RELDATE "2012/06/18" +#define DRV_MODULE_VERSION "1.78.00-0" +#define DRV_MODULE_RELDATE "2012/09/27" #define BNX2X_BC_VER 0x040200 #if defined(CONFIG_DCB) @@ -1458,7 +1458,7 @@ struct bnx2x { int fw_stats_req_sz; /* - * FW statistics data shortcut (points at the begining of + * FW statistics data shortcut (points at the beginning of * fw_stats buffer + fw_stats_req_sz). */ struct bnx2x_fw_stats_data *fw_stats_data; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index e8e97a7d1d0..30f04a38922 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2285,7 +2285,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) /* Wait for all pending SP commands to complete */ if (!bnx2x_wait_sp_comp(bp, ~0x0UL)) { BNX2X_ERR("Timeout waiting for SP elements to complete\n"); - bnx2x_nic_unload(bp, UNLOAD_CLOSE); + bnx2x_nic_unload(bp, UNLOAD_CLOSE, false); return -EBUSY; } @@ -2333,7 +2333,7 @@ load_error0: } /* must be called with rtnl_lock */ -int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode) +int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) { int i; bool global = false; @@ -2395,7 +2395,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode) /* Cleanup the chip if needed */ if (unload_mode != UNLOAD_RECOVERY) - bnx2x_chip_cleanup(bp, unload_mode); + bnx2x_chip_cleanup(bp, unload_mode, keep_link); else { /* Send the UNLOAD_REQUEST to the MCP */ bnx2x_send_unload_req(bp, unload_mode); @@ -2419,7 +2419,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode) bnx2x_free_irq(bp); /* Report UNLOAD_DONE to MCP */ - bnx2x_send_unload_done(bp); + bnx2x_send_unload_done(bp, false); } /* @@ -3026,8 +3026,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) first_bd = tx_start_bd; tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; - SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_ETH_ADDR_TYPE, - mac_type); + SET_FLAG(tx_start_bd->general_data, + ETH_TX_START_BD_PARSE_NBDS, + 0); /* header nbd */ SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1); @@ -3077,13 +3078,20 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) &pbd_e2->dst_mac_addr_lo, eth->h_dest); } + + SET_FLAG(pbd_e2_parsing_data, + ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, mac_type); } else { + u16 global_data = 0; pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x; memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x)); /* Set PBD in checksum offload case */ if (xmit_type & XMIT_CSUM) hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type); + SET_FLAG(global_data, + ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE, mac_type); + pbd_e1x->global_data |= cpu_to_le16(global_data); } /* Setup the data pointer of the first BD of the packet */ @@ -3770,7 +3778,7 @@ int bnx2x_reload_if_running(struct net_device *dev) if (unlikely(!netif_running(dev))) return 0; - bnx2x_nic_unload(bp, UNLOAD_NORMAL); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); return bnx2x_nic_load(bp, LOAD_NORMAL); } @@ -3967,7 +3975,7 @@ int bnx2x_suspend(struct pci_dev *pdev, pm_message_t state) netif_device_detach(dev); - bnx2x_nic_unload(bp, UNLOAD_CLOSE); + bnx2x_nic_unload(bp, UNLOAD_CLOSE, false); bnx2x_set_power_state(bp, pci_choose_state(pdev, state)); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index dfd86a55f1d..9c5ea6c5b4c 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -83,8 +83,9 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode); * bnx2x_send_unload_done - send UNLOAD_DONE command to the MCP. * * @bp: driver handle + * @keep_link: true iff link should be kept up */ -void bnx2x_send_unload_done(struct bnx2x *bp); +void bnx2x_send_unload_done(struct bnx2x *bp, bool keep_link); /** * bnx2x_config_rss_pf - configure RSS parameters in a PF. @@ -153,6 +154,14 @@ u8 bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode); void bnx2x_link_set(struct bnx2x *bp); /** + * bnx2x_force_link_reset - Forces link reset, and put the PHY + * in reset as well. + * + * @bp: driver handle + */ +void bnx2x_force_link_reset(struct bnx2x *bp); + +/** * bnx2x_link_test - query link status. * * @bp: driver handle @@ -312,12 +321,13 @@ void bnx2x_set_num_queues(struct bnx2x *bp); * * @bp: driver handle * @unload_mode: COMMON, PORT, FUNCTION + * @keep_link: true iff link should be kept up. * * - Cleanup MAC configuration. * - Closes clients. * - etc. */ -void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode); +void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link); /** * bnx2x_acquire_hw_lock - acquire HW lock. @@ -446,7 +456,7 @@ void bnx2x_fw_dump_lvl(struct bnx2x *bp, const char *lvl); bool bnx2x_test_firmware_version(struct bnx2x *bp, bool is_err); /* dev_close main block */ -int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode); +int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link); /* dev_open main block */ int bnx2x_nic_load(struct bnx2x *bp, int load_mode); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c index 8a73374e52a..2245c389540 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c @@ -91,25 +91,21 @@ static void bnx2x_pfc_set(struct bnx2x *bp) /* * Rx COS configuration * Changing PFC RX configuration . - * In RX COS0 will always be configured to lossy and COS1 to lossless + * In RX COS0 will always be configured to lossless and COS1 to lossy */ for (i = 0 ; i < MAX_PFC_PRIORITIES ; i++) { pri_bit = 1 << i; - if (pri_bit & DCBX_PFC_PRI_PAUSE_MASK(bp)) + if (!(pri_bit & DCBX_PFC_PRI_PAUSE_MASK(bp))) val |= 1 << (i * 4); } pfc_params.pkt_priority_to_cos = val; /* RX COS0 */ - pfc_params.llfc_low_priority_classes = 0; + pfc_params.llfc_low_priority_classes = DCBX_PFC_PRI_PAUSE_MASK(bp); /* RX COS1 */ - pfc_params.llfc_high_priority_classes = DCBX_PFC_PRI_PAUSE_MASK(bp); - - /* BRB configuration */ - pfc_params.cos0_pauseable = false; - pfc_params.cos1_pauseable = true; + pfc_params.llfc_high_priority_classes = 0; bnx2x_acquire_phy_lock(bp); bp->link_params.feature_config_flags |= FEATURE_CONFIG_PFC_ENABLED; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index ebf40cd7aa1..c65295dded3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -905,6 +905,7 @@ static int bnx2x_nway_reset(struct net_device *dev) if (netif_running(dev)) { bnx2x_stats_handle(bp, STATS_EVENT_STOP); + bnx2x_force_link_reset(bp); bnx2x_link_set(bp); } @@ -1606,7 +1607,7 @@ static int bnx2x_set_pauseparam(struct net_device *dev, return 0; } -static char *bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF] = { +static const char bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF][ETH_GSTRING_LEN] = { "register_test (offline) ", "memory_test (offline) ", "int_loopback_test (offline)", @@ -1653,7 +1654,7 @@ static int bnx2x_get_eee(struct net_device *dev, struct ethtool_eee *edata) return -EOPNOTSUPP; } - eee_cfg = SHMEM2_RD(bp, eee_status[BP_PORT(bp)]); + eee_cfg = bp->link_vars.eee_status; edata->supported = bnx2x_eee_to_adv((eee_cfg & SHMEM_EEE_SUPPORTED_MASK) >> @@ -1690,7 +1691,7 @@ static int bnx2x_set_eee(struct net_device *dev, struct ethtool_eee *edata) return -EOPNOTSUPP; } - eee_cfg = SHMEM2_RD(bp, eee_status[BP_PORT(bp)]); + eee_cfg = bp->link_vars.eee_status; if (!(eee_cfg & SHMEM_EEE_SUPPORTED_MASK)) { DP(BNX2X_MSG_ETHTOOL, "Board does not support EEE!\n"); @@ -1739,6 +1740,7 @@ static int bnx2x_set_eee(struct net_device *dev, struct ethtool_eee *edata) /* Restart link to propogate changes */ if (netif_running(dev)) { bnx2x_stats_handle(bp, STATS_EVENT_STOP); + bnx2x_force_link_reset(bp); bnx2x_link_set(bp); } @@ -2038,8 +2040,6 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) u16 pkt_prod, bd_prod; struct sw_tx_bd *tx_buf; struct eth_tx_start_bd *tx_start_bd; - struct eth_tx_parse_bd_e1x *pbd_e1x = NULL; - struct eth_tx_parse_bd_e2 *pbd_e2 = NULL; dma_addr_t mapping; union eth_rx_cqe *cqe; u8 cqe_fp_flags, cqe_fp_type; @@ -2131,21 +2131,32 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod); tx_start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; SET_FLAG(tx_start_bd->general_data, - ETH_TX_START_BD_ETH_ADDR_TYPE, - UNICAST_ADDRESS); - SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_HDR_NBDS, 1); + SET_FLAG(tx_start_bd->general_data, + ETH_TX_START_BD_PARSE_NBDS, + 0); /* turn on parsing and get a BD */ bd_prod = TX_BD(NEXT_TX_IDX(bd_prod)); - pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x; - pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2; - - memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2)); - memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x)); - + if (CHIP_IS_E1x(bp)) { + u16 global_data = 0; + struct eth_tx_parse_bd_e1x *pbd_e1x = + &txdata->tx_desc_ring[bd_prod].parse_bd_e1x; + memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x)); + SET_FLAG(global_data, + ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE, UNICAST_ADDRESS); + pbd_e1x->global_data = cpu_to_le16(global_data); + } else { + u32 parsing_data = 0; + struct eth_tx_parse_bd_e2 *pbd_e2 = + &txdata->tx_desc_ring[bd_prod].parse_bd_e2; + memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2)); + SET_FLAG(parsing_data, + ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, UNICAST_ADDRESS); + pbd_e2->parsing_data = cpu_to_le32(parsing_data); + } wmb(); txdata->tx_db.data.prod += 2; @@ -2263,7 +2274,7 @@ static int bnx2x_test_ext_loopback(struct bnx2x *bp) if (!netif_running(bp->dev)) return BNX2X_EXT_LOOPBACK_FAILED; - bnx2x_nic_unload(bp, UNLOAD_NORMAL); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, false); rc = bnx2x_nic_load(bp, LOAD_LOOPBACK_EXT); if (rc) { DP(BNX2X_MSG_ETHTOOL, @@ -2414,7 +2425,7 @@ static void bnx2x_self_test(struct net_device *dev, link_up = bp->link_vars.link_up; - bnx2x_nic_unload(bp, UNLOAD_NORMAL); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, false); rc = bnx2x_nic_load(bp, LOAD_DIAG); if (rc) { etest->flags |= ETH_TEST_FL_FAILED; @@ -2446,7 +2457,7 @@ static void bnx2x_self_test(struct net_device *dev, etest->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE; } - bnx2x_nic_unload(bp, UNLOAD_NORMAL); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, false); /* restore input for TX port IF */ REG_WR(bp, NIG_REG_EGRESS_UMP0_IN_EN + port*4, val); @@ -2534,7 +2545,7 @@ static int bnx2x_get_sset_count(struct net_device *dev, int stringset) static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf) { struct bnx2x *bp = netdev_priv(dev); - int i, j, k, offset, start; + int i, j, k, start; char queue_name[MAX_QUEUE_NAME_LEN+1]; switch (stringset) { @@ -2570,13 +2581,8 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf) start = 0; else start = 4; - for (i = 0, j = start; j < (start + BNX2X_NUM_TESTS(bp)); - i++, j++) { - offset = sprintf(buf+32*i, "%s", - bnx2x_tests_str_arr[j]); - *(buf+offset) = '\0'; - } - break; + memcpy(buf, bnx2x_tests_str_arr + start, + ETH_GSTRING_LEN * BNX2X_NUM_TESTS(bp)); } } @@ -2940,7 +2946,7 @@ static int bnx2x_set_channels(struct net_device *dev, bnx2x_change_num_queues(bp, channels->combined_count); return 0; } - bnx2x_nic_unload(bp, UNLOAD_NORMAL); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); bnx2x_change_num_queues(bp, channels->combined_count); return bnx2x_nic_load(bp, LOAD_NORMAL); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h index bbc66ced9c2..620fe939ecf 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h @@ -88,9 +88,6 @@ #define TSTORM_ASSERT_LIST_INDEX_OFFSET (IRO[102].base) #define TSTORM_ASSERT_LIST_OFFSET(assertListEntry) \ (IRO[101].base + ((assertListEntry) * IRO[101].m1)) -#define TSTORM_COMMON_SAFC_WORKAROUND_ENABLE_OFFSET (IRO[107].base) -#define TSTORM_COMMON_SAFC_WORKAROUND_TIMEOUT_10USEC_OFFSET \ - (IRO[108].base) #define TSTORM_FUNCTION_COMMON_CONFIG_OFFSET(pfId) \ (IRO[201].base + ((pfId) * IRO[201].m1)) #define TSTORM_FUNC_EN_OFFSET(funcId) \ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index 76b6e65790f..18704929e64 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -1286,6 +1286,9 @@ struct drv_func_mb { #define DRV_MSG_CODE_SET_MF_BW_MIN_MASK 0x00ff0000 #define DRV_MSG_CODE_SET_MF_BW_MAX_MASK 0xff000000 + #define DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET 0x00000002 + + #define DRV_MSG_CODE_LOAD_REQ_WITH_LFA 0x0000100a u32 fw_mb_header; #define FW_MSG_CODE_MASK 0xffff0000 #define FW_MSG_CODE_DRV_LOAD_COMMON 0x10100000 @@ -1909,6 +1912,54 @@ struct lldp_local_mib { }; /***END OF DCBX STRUCTURES DECLARATIONS***/ +/***********************************************************/ +/* Elink section */ +/***********************************************************/ +#define SHMEM_LINK_CONFIG_SIZE 2 +struct shmem_lfa { + u32 req_duplex; + #define REQ_DUPLEX_PHY0_MASK 0x0000ffff + #define REQ_DUPLEX_PHY0_SHIFT 0 + #define REQ_DUPLEX_PHY1_MASK 0xffff0000 + #define REQ_DUPLEX_PHY1_SHIFT 16 + u32 req_flow_ctrl; + #define REQ_FLOW_CTRL_PHY0_MASK 0x0000ffff + #define REQ_FLOW_CTRL_PHY0_SHIFT 0 + #define REQ_FLOW_CTRL_PHY1_MASK 0xffff0000 + #define REQ_FLOW_CTRL_PHY1_SHIFT 16 + u32 req_line_speed; /* Also determine AutoNeg */ + #define REQ_LINE_SPD_PHY0_MASK 0x0000ffff + #define REQ_LINE_SPD_PHY0_SHIFT 0 + #define REQ_LINE_SPD_PHY1_MASK 0xffff0000 + #define REQ_LINE_SPD_PHY1_SHIFT 16 + u32 speed_cap_mask[SHMEM_LINK_CONFIG_SIZE]; + u32 additional_config; + #define REQ_FC_AUTO_ADV_MASK 0x0000ffff + #define REQ_FC_AUTO_ADV0_SHIFT 0 + #define NO_LFA_DUE_TO_DCC_MASK 0x00010000 + u32 lfa_sts; + #define LFA_LINK_FLAP_REASON_OFFSET 0 + #define LFA_LINK_FLAP_REASON_MASK 0x000000ff + #define LFA_LINK_DOWN 0x1 + #define LFA_LOOPBACK_ENABLED 0x2 + #define LFA_DUPLEX_MISMATCH 0x3 + #define LFA_MFW_IS_TOO_OLD 0x4 + #define LFA_LINK_SPEED_MISMATCH 0x5 + #define LFA_FLOW_CTRL_MISMATCH 0x6 + #define LFA_SPEED_CAP_MISMATCH 0x7 + #define LFA_DCC_LFA_DISABLED 0x8 + #define LFA_EEE_MISMATCH 0x9 + + #define LINK_FLAP_AVOIDANCE_COUNT_OFFSET 8 + #define LINK_FLAP_AVOIDANCE_COUNT_MASK 0x0000ff00 + + #define LINK_FLAP_COUNT_OFFSET 16 + #define LINK_FLAP_COUNT_MASK 0x00ff0000 + + #define LFA_FLAGS_MASK 0xff000000 + #define SHMEM_LFA_DONT_CLEAR_STAT (1<<24) +}; + struct ncsi_oem_fcoe_features { u32 fcoe_features1; #define FCOE_FEATURES1_IOS_PER_CONNECTION_MASK 0x0000FFFF @@ -2738,8 +2789,8 @@ struct afex_stats { }; #define BCM_5710_FW_MAJOR_VERSION 7 -#define BCM_5710_FW_MINOR_VERSION 2 -#define BCM_5710_FW_REVISION_VERSION 51 +#define BCM_5710_FW_MINOR_VERSION 8 +#define BCM_5710_FW_REVISION_VERSION 2 #define BCM_5710_FW_ENGINEERING_VERSION 0 #define BCM_5710_FW_COMPILE_FLAGS 1 @@ -3861,10 +3912,8 @@ struct eth_rss_update_ramrod_data { #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_TCP_CAPABILITY_SHIFT 4 #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_UDP_CAPABILITY (0x1<<5) #define ETH_RSS_UPDATE_RAMROD_DATA_IPV6_UDP_CAPABILITY_SHIFT 5 -#define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY (0x1<<6) -#define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY_SHIFT 6 -#define __ETH_RSS_UPDATE_RAMROD_DATA_RESERVED0 (0x1<<7) -#define __ETH_RSS_UPDATE_RAMROD_DATA_RESERVED0_SHIFT 7 +#define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY (0x1<<7) +#define ETH_RSS_UPDATE_RAMROD_DATA_UPDATE_RSS_KEY_SHIFT 7 u8 rss_result_mask; u8 rss_mode; __le32 __reserved2; @@ -4080,27 +4129,29 @@ struct eth_tx_start_bd { #define ETH_TX_START_BD_HDR_NBDS_SHIFT 0 #define ETH_TX_START_BD_FORCE_VLAN_MODE (0x1<<4) #define ETH_TX_START_BD_FORCE_VLAN_MODE_SHIFT 4 -#define ETH_TX_START_BD_RESREVED (0x1<<5) -#define ETH_TX_START_BD_RESREVED_SHIFT 5 -#define ETH_TX_START_BD_ETH_ADDR_TYPE (0x3<<6) -#define ETH_TX_START_BD_ETH_ADDR_TYPE_SHIFT 6 +#define ETH_TX_START_BD_PARSE_NBDS (0x3<<5) +#define ETH_TX_START_BD_PARSE_NBDS_SHIFT 5 +#define ETH_TX_START_BD_RESREVED (0x1<<7) +#define ETH_TX_START_BD_RESREVED_SHIFT 7 }; /* * Tx parsing BD structure for ETH E1/E1h */ struct eth_tx_parse_bd_e1x { - u8 global_data; + __le16 global_data; #define ETH_TX_PARSE_BD_E1X_IP_HDR_START_OFFSET_W (0xF<<0) #define ETH_TX_PARSE_BD_E1X_IP_HDR_START_OFFSET_W_SHIFT 0 -#define ETH_TX_PARSE_BD_E1X_RESERVED0 (0x1<<4) -#define ETH_TX_PARSE_BD_E1X_RESERVED0_SHIFT 4 -#define ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN (0x1<<5) -#define ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN_SHIFT 5 -#define ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN (0x1<<6) -#define ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT 6 -#define ETH_TX_PARSE_BD_E1X_NS_FLG (0x1<<7) -#define ETH_TX_PARSE_BD_E1X_NS_FLG_SHIFT 7 +#define ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE (0x3<<4) +#define ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE_SHIFT 4 +#define ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN (0x1<<6) +#define ETH_TX_PARSE_BD_E1X_PSEUDO_CS_WITHOUT_LEN_SHIFT 6 +#define ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN (0x1<<7) +#define ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT 7 +#define ETH_TX_PARSE_BD_E1X_NS_FLG (0x1<<8) +#define ETH_TX_PARSE_BD_E1X_NS_FLG_SHIFT 8 +#define ETH_TX_PARSE_BD_E1X_RESERVED0 (0x7F<<9) +#define ETH_TX_PARSE_BD_E1X_RESERVED0_SHIFT 9 u8 tcp_flags; #define ETH_TX_PARSE_BD_E1X_FIN_FLG (0x1<<0) #define ETH_TX_PARSE_BD_E1X_FIN_FLG_SHIFT 0 @@ -4119,7 +4170,6 @@ struct eth_tx_parse_bd_e1x { #define ETH_TX_PARSE_BD_E1X_CWR_FLG (0x1<<7) #define ETH_TX_PARSE_BD_E1X_CWR_FLG_SHIFT 7 u8 ip_hlen_w; - s8 reserved; __le16 total_hlen_w; __le16 tcp_pseudo_csum; __le16 lso_mss; @@ -4138,14 +4188,16 @@ struct eth_tx_parse_bd_e2 { __le16 src_mac_addr_mid; __le16 src_mac_addr_hi; __le32 parsing_data; -#define ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W (0x1FFF<<0) +#define ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W (0x7FF<<0) #define ETH_TX_PARSE_BD_E2_TCP_HDR_START_OFFSET_W_SHIFT 0 -#define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW (0xF<<13) -#define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT 13 -#define ETH_TX_PARSE_BD_E2_LSO_MSS (0x3FFF<<17) -#define ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT 17 -#define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR (0x1<<31) -#define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR_SHIFT 31 +#define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW (0xF<<11) +#define ETH_TX_PARSE_BD_E2_TCP_HDR_LENGTH_DW_SHIFT 11 +#define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR (0x1<<15) +#define ETH_TX_PARSE_BD_E2_IPV6_WITH_EXT_HDR_SHIFT 15 +#define ETH_TX_PARSE_BD_E2_LSO_MSS (0x3FFF<<16) +#define ETH_TX_PARSE_BD_E2_LSO_MSS_SHIFT 16 +#define ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE (0x3<<30) +#define ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT 30 }; /* @@ -4913,7 +4965,8 @@ struct flow_control_configuration { * */ struct function_start_data { - __le16 function_mode; + u8 function_mode; + u8 reserved; __le16 sd_vlan_tag; __le16 vif_id; u8 path_id; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h index 559c396d45c..c8f10f0e8a0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init.h @@ -566,7 +566,7 @@ static const struct { u32 e2; /* 57712 */ u32 e3; /* 578xx */ } reg_mask; /* Register mask (all valid bits) */ - char name[7]; /* Block's longest name is 6 characters long + char name[8]; /* Block's longest name is 7 characters long * (name + suffix) */ } bnx2x_blocks_parity_data[] = { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index b046beb435b..e2e45ee5df3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -161,120 +161,6 @@ #define EDC_MODE_LIMITING 0x0044 #define EDC_MODE_PASSIVE_DAC 0x0055 -/* BRB default for class 0 E2 */ -#define DEFAULT0_E2_BRB_MAC_PAUSE_XOFF_THR 170 -#define DEFAULT0_E2_BRB_MAC_PAUSE_XON_THR 250 -#define DEFAULT0_E2_BRB_MAC_FULL_XOFF_THR 10 -#define DEFAULT0_E2_BRB_MAC_FULL_XON_THR 50 - -/* BRB thresholds for E2*/ -#define PFC_E2_BRB_MAC_PAUSE_XOFF_THR_PAUSE 170 -#define PFC_E2_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE 0 - -#define PFC_E2_BRB_MAC_PAUSE_XON_THR_PAUSE 250 -#define PFC_E2_BRB_MAC_PAUSE_XON_THR_NON_PAUSE 0 - -#define PFC_E2_BRB_MAC_FULL_XOFF_THR_PAUSE 10 -#define PFC_E2_BRB_MAC_FULL_XOFF_THR_NON_PAUSE 90 - -#define PFC_E2_BRB_MAC_FULL_XON_THR_PAUSE 50 -#define PFC_E2_BRB_MAC_FULL_XON_THR_NON_PAUSE 250 - -/* BRB default for class 0 E3A0 */ -#define DEFAULT0_E3A0_BRB_MAC_PAUSE_XOFF_THR 290 -#define DEFAULT0_E3A0_BRB_MAC_PAUSE_XON_THR 410 -#define DEFAULT0_E3A0_BRB_MAC_FULL_XOFF_THR 10 -#define DEFAULT0_E3A0_BRB_MAC_FULL_XON_THR 50 - -/* BRB thresholds for E3A0 */ -#define PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_PAUSE 290 -#define PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE 0 - -#define PFC_E3A0_BRB_MAC_PAUSE_XON_THR_PAUSE 410 -#define PFC_E3A0_BRB_MAC_PAUSE_XON_THR_NON_PAUSE 0 - -#define PFC_E3A0_BRB_MAC_FULL_XOFF_THR_PAUSE 10 -#define PFC_E3A0_BRB_MAC_FULL_XOFF_THR_NON_PAUSE 170 - -#define PFC_E3A0_BRB_MAC_FULL_XON_THR_PAUSE 50 -#define PFC_E3A0_BRB_MAC_FULL_XON_THR_NON_PAUSE 410 - -/* BRB default for E3B0 */ -#define DEFAULT0_E3B0_BRB_MAC_PAUSE_XOFF_THR 330 -#define DEFAULT0_E3B0_BRB_MAC_PAUSE_XON_THR 490 -#define DEFAULT0_E3B0_BRB_MAC_FULL_XOFF_THR 15 -#define DEFAULT0_E3B0_BRB_MAC_FULL_XON_THR 55 - -/* BRB thresholds for E3B0 2 port mode*/ -#define PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_PAUSE 1025 -#define PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE 0 - -#define PFC_E3B0_2P_BRB_MAC_PAUSE_XON_THR_PAUSE 1025 -#define PFC_E3B0_2P_BRB_MAC_PAUSE_XON_THR_NON_PAUSE 0 - -#define PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_PAUSE 10 -#define PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_NON_PAUSE 1025 - -#define PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_PAUSE 50 -#define PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_NON_PAUSE 1025 - -/* only for E3B0*/ -#define PFC_E3B0_2P_BRB_FULL_LB_XOFF_THR 1025 -#define PFC_E3B0_2P_BRB_FULL_LB_XON_THR 1025 - -/* Lossy +Lossless GUARANTIED == GUART */ -#define PFC_E3B0_2P_MIX_PAUSE_LB_GUART 284 -/* Lossless +Lossless*/ -#define PFC_E3B0_2P_PAUSE_LB_GUART 236 -/* Lossy +Lossy*/ -#define PFC_E3B0_2P_NON_PAUSE_LB_GUART 342 - -/* Lossy +Lossless*/ -#define PFC_E3B0_2P_MIX_PAUSE_MAC_0_CLASS_T_GUART 284 -/* Lossless +Lossless*/ -#define PFC_E3B0_2P_PAUSE_MAC_0_CLASS_T_GUART 236 -/* Lossy +Lossy*/ -#define PFC_E3B0_2P_NON_PAUSE_MAC_0_CLASS_T_GUART 336 -#define PFC_E3B0_2P_BRB_MAC_0_CLASS_T_GUART_HYST 80 - -#define PFC_E3B0_2P_BRB_MAC_1_CLASS_T_GUART 0 -#define PFC_E3B0_2P_BRB_MAC_1_CLASS_T_GUART_HYST 0 - -/* BRB thresholds for E3B0 4 port mode */ -#define PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_PAUSE 304 -#define PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE 0 - -#define PFC_E3B0_4P_BRB_MAC_PAUSE_XON_THR_PAUSE 384 -#define PFC_E3B0_4P_BRB_MAC_PAUSE_XON_THR_NON_PAUSE 0 - -#define PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_PAUSE 10 -#define PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_NON_PAUSE 304 - -#define PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_PAUSE 50 -#define PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_NON_PAUSE 384 - -/* only for E3B0*/ -#define PFC_E3B0_4P_BRB_FULL_LB_XOFF_THR 304 -#define PFC_E3B0_4P_BRB_FULL_LB_XON_THR 384 -#define PFC_E3B0_4P_LB_GUART 120 - -#define PFC_E3B0_4P_BRB_MAC_0_CLASS_T_GUART 120 -#define PFC_E3B0_4P_BRB_MAC_0_CLASS_T_GUART_HYST 80 - -#define PFC_E3B0_4P_BRB_MAC_1_CLASS_T_GUART 80 -#define PFC_E3B0_4P_BRB_MAC_1_CLASS_T_GUART_HYST 120 - -/* Pause defines*/ -#define DEFAULT_E3B0_BRB_FULL_LB_XOFF_THR 330 -#define DEFAULT_E3B0_BRB_FULL_LB_XON_THR 490 -#define DEFAULT_E3B0_LB_GUART 40 - -#define DEFAULT_E3B0_BRB_MAC_0_CLASS_T_GUART 40 -#define DEFAULT_E3B0_BRB_MAC_0_CLASS_T_GUART_HYST 0 - -#define DEFAULT_E3B0_BRB_MAC_1_CLASS_T_GUART 40 -#define DEFAULT_E3B0_BRB_MAC_1_CLASS_T_GUART_HYST 0 - /* ETS defines*/ #define DCBX_INVALID_COS (0xFF) @@ -321,6 +207,127 @@ static u32 bnx2x_bits_dis(struct bnx2x *bp, u32 reg, u32 bits) return val; } +/* + * bnx2x_check_lfa - This function checks if link reinitialization is required, + * or link flap can be avoided. + * + * @params: link parameters + * Returns 0 if Link Flap Avoidance conditions are met otherwise, the failed + * condition code. + */ +static int bnx2x_check_lfa(struct link_params *params) +{ + u32 link_status, cfg_idx, lfa_mask, cfg_size; + u32 cur_speed_cap_mask, cur_req_fc_auto_adv, additional_config; + u32 saved_val, req_val, eee_status; + struct bnx2x *bp = params->bp; + + additional_config = + REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, additional_config)); + + /* NOTE: must be first condition checked - + * to verify DCC bit is cleared in any case! + */ + if (additional_config & NO_LFA_DUE_TO_DCC_MASK) { + DP(NETIF_MSG_LINK, "No LFA due to DCC flap after clp exit\n"); + REG_WR(bp, params->lfa_base + + offsetof(struct shmem_lfa, additional_config), + additional_config & ~NO_LFA_DUE_TO_DCC_MASK); + return LFA_DCC_LFA_DISABLED; + } + + /* Verify that link is up */ + link_status = REG_RD(bp, params->shmem_base + + offsetof(struct shmem_region, + port_mb[params->port].link_status)); + if (!(link_status & LINK_STATUS_LINK_UP)) + return LFA_LINK_DOWN; + + /* Verify that loopback mode is not set */ + if (params->loopback_mode) + return LFA_LOOPBACK_ENABLED; + + /* Verify that MFW supports LFA */ + if (!params->lfa_base) + return LFA_MFW_IS_TOO_OLD; + + if (params->num_phys == 3) { + cfg_size = 2; + lfa_mask = 0xffffffff; + } else { + cfg_size = 1; + lfa_mask = 0xffff; + } + + /* Compare Duplex */ + saved_val = REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, req_duplex)); + req_val = params->req_duplex[0] | (params->req_duplex[1] << 16); + if ((saved_val & lfa_mask) != (req_val & lfa_mask)) { + DP(NETIF_MSG_LINK, "Duplex mismatch %x vs. %x\n", + (saved_val & lfa_mask), (req_val & lfa_mask)); + return LFA_DUPLEX_MISMATCH; + } + /* Compare Flow Control */ + saved_val = REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, req_flow_ctrl)); + req_val = params->req_flow_ctrl[0] | (params->req_flow_ctrl[1] << 16); + if ((saved_val & lfa_mask) != (req_val & lfa_mask)) { + DP(NETIF_MSG_LINK, "Flow control mismatch %x vs. %x\n", + (saved_val & lfa_mask), (req_val & lfa_mask)); + return LFA_FLOW_CTRL_MISMATCH; + } + /* Compare Link Speed */ + saved_val = REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, req_line_speed)); + req_val = params->req_line_speed[0] | (params->req_line_speed[1] << 16); + if ((saved_val & lfa_mask) != (req_val & lfa_mask)) { + DP(NETIF_MSG_LINK, "Link speed mismatch %x vs. %x\n", + (saved_val & lfa_mask), (req_val & lfa_mask)); + return LFA_LINK_SPEED_MISMATCH; + } + + for (cfg_idx = 0; cfg_idx < cfg_size; cfg_idx++) { + cur_speed_cap_mask = REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, + speed_cap_mask[cfg_idx])); + + if (cur_speed_cap_mask != params->speed_cap_mask[cfg_idx]) { + DP(NETIF_MSG_LINK, "Speed Cap mismatch %x vs. %x\n", + cur_speed_cap_mask, + params->speed_cap_mask[cfg_idx]); + return LFA_SPEED_CAP_MISMATCH; + } + } + + cur_req_fc_auto_adv = + REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, additional_config)) & + REQ_FC_AUTO_ADV_MASK; + + if ((u16)cur_req_fc_auto_adv != params->req_fc_auto_adv) { + DP(NETIF_MSG_LINK, "Flow Ctrl AN mismatch %x vs. %x\n", + cur_req_fc_auto_adv, params->req_fc_auto_adv); + return LFA_FLOW_CTRL_MISMATCH; + } + + eee_status = REG_RD(bp, params->shmem2_base + + offsetof(struct shmem2_region, + eee_status[params->port])); + + if (((eee_status & SHMEM_EEE_LPI_REQUESTED_BIT) ^ + (params->eee_mode & EEE_MODE_ENABLE_LPI)) || + ((eee_status & SHMEM_EEE_REQUESTED_BIT) ^ + (params->eee_mode & EEE_MODE_ADV_LPI))) { + DP(NETIF_MSG_LINK, "EEE mismatch %x vs. %x\n", params->eee_mode, + eee_status); + return LFA_EEE_MISMATCH; + } + + /* LFA conditions are met */ + return 0; +} /******************************************************************/ /* EPIO/GPIO section */ /******************************************************************/ @@ -1307,93 +1314,6 @@ int bnx2x_ets_strict(const struct link_params *params, const u8 strict_cos) } /******************************************************************/ -/* EEE section */ -/******************************************************************/ -static u8 bnx2x_eee_has_cap(struct link_params *params) -{ - struct bnx2x *bp = params->bp; - - if (REG_RD(bp, params->shmem2_base) <= - offsetof(struct shmem2_region, eee_status[params->port])) - return 0; - - return 1; -} - -static int bnx2x_eee_nvram_to_time(u32 nvram_mode, u32 *idle_timer) -{ - switch (nvram_mode) { - case PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED: - *idle_timer = EEE_MODE_NVRAM_BALANCED_TIME; - break; - case PORT_FEAT_CFG_EEE_POWER_MODE_AGGRESSIVE: - *idle_timer = EEE_MODE_NVRAM_AGGRESSIVE_TIME; - break; - case PORT_FEAT_CFG_EEE_POWER_MODE_LOW_LATENCY: - *idle_timer = EEE_MODE_NVRAM_LATENCY_TIME; - break; - default: - *idle_timer = 0; - break; - } - - return 0; -} - -static int bnx2x_eee_time_to_nvram(u32 idle_timer, u32 *nvram_mode) -{ - switch (idle_timer) { - case EEE_MODE_NVRAM_BALANCED_TIME: - *nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED; - break; - case EEE_MODE_NVRAM_AGGRESSIVE_TIME: - *nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_AGGRESSIVE; - break; - case EEE_MODE_NVRAM_LATENCY_TIME: - *nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_LOW_LATENCY; - break; - default: - *nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_DISABLED; - break; - } - - return 0; -} - -static u32 bnx2x_eee_calc_timer(struct link_params *params) -{ - u32 eee_mode, eee_idle; - struct bnx2x *bp = params->bp; - - if (params->eee_mode & EEE_MODE_OVERRIDE_NVRAM) { - if (params->eee_mode & EEE_MODE_OUTPUT_TIME) { - /* time value in eee_mode --> used directly*/ - eee_idle = params->eee_mode & EEE_MODE_TIMER_MASK; - } else { - /* hsi value in eee_mode --> time */ - if (bnx2x_eee_nvram_to_time(params->eee_mode & - EEE_MODE_NVRAM_MASK, - &eee_idle)) - return 0; - } - } else { - /* hsi values in nvram --> time*/ - eee_mode = ((REG_RD(bp, params->shmem_base + - offsetof(struct shmem_region, dev_info. - port_feature_config[params->port]. - eee_power_mode)) & - PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >> - PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT); - - if (bnx2x_eee_nvram_to_time(eee_mode, &eee_idle)) - return 0; - } - - return eee_idle; -} - - -/******************************************************************/ /* PFC section */ /******************************************************************/ static void bnx2x_update_pfc_xmac(struct link_params *params, @@ -1606,16 +1526,23 @@ static void bnx2x_set_xumac_nig(struct link_params *params, NIG_REG_P0_MAC_PAUSE_OUT_EN, tx_pause_en); } -static void bnx2x_umac_disable(struct link_params *params) +static void bnx2x_set_umac_rxtx(struct link_params *params, u8 en) { u32 umac_base = params->port ? GRCBASE_UMAC1 : GRCBASE_UMAC0; + u32 val; struct bnx2x *bp = params->bp; if (!(REG_RD(bp, MISC_REG_RESET_REG_2) & (MISC_REGISTERS_RESET_REG_2_UMAC0 << params->port))) return; - + val = REG_RD(bp, umac_base + UMAC_REG_COMMAND_CONFIG); + if (en) + val |= (UMAC_COMMAND_CONFIG_REG_TX_ENA | + UMAC_COMMAND_CONFIG_REG_RX_ENA); + else + val &= ~(UMAC_COMMAND_CONFIG_REG_TX_ENA | + UMAC_COMMAND_CONFIG_REG_RX_ENA); /* Disable RX and TX */ - REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, 0); + REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, val); } static void bnx2x_umac_enable(struct link_params *params, @@ -1671,6 +1598,16 @@ static void bnx2x_umac_enable(struct link_params *params, REG_WR(bp, umac_base + UMAC_REG_COMMAND_CONFIG, val); udelay(50); + /* Configure UMAC for EEE */ + if (vars->eee_status & SHMEM_EEE_ADV_STATUS_MASK) { + DP(NETIF_MSG_LINK, "configured UMAC for EEE\n"); + REG_WR(bp, umac_base + UMAC_REG_UMAC_EEE_CTRL, + UMAC_UMAC_EEE_CTRL_REG_EEE_EN); + REG_WR(bp, umac_base + UMAC_REG_EEE_WAKE_TIMER, 0x11); + } else { + REG_WR(bp, umac_base + UMAC_REG_UMAC_EEE_CTRL, 0x0); + } + /* Set MAC address for source TX Pause/PFC frames (under SW reset) */ REG_WR(bp, umac_base + UMAC_REG_MAC_ADDR0, ((params->mac_addr[2] << 24) | @@ -1766,11 +1703,12 @@ static void bnx2x_xmac_init(struct link_params *params, u32 max_speed) } -static void bnx2x_xmac_disable(struct link_params *params) +static void bnx2x_set_xmac_rxtx(struct link_params *params, u8 en) { u8 port = params->port; struct bnx2x *bp = params->bp; u32 pfc_ctrl, xmac_base = (port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0; + u32 val; if (REG_RD(bp, MISC_REG_RESET_REG_2) & MISC_REGISTERS_RESET_REG_2_XMAC) { @@ -1784,7 +1722,12 @@ static void bnx2x_xmac_disable(struct link_params *params) REG_WR(bp, xmac_base + XMAC_REG_PFC_CTRL_HI, (pfc_ctrl | (1<<1))); DP(NETIF_MSG_LINK, "Disable XMAC on port %x\n", port); - REG_WR(bp, xmac_base + XMAC_REG_CTRL, 0); + val = REG_RD(bp, xmac_base + XMAC_REG_CTRL); + if (en) + val |= (XMAC_CTRL_REG_TX_EN | XMAC_CTRL_REG_RX_EN); + else + val &= ~(XMAC_CTRL_REG_TX_EN | XMAC_CTRL_REG_RX_EN); + REG_WR(bp, xmac_base + XMAC_REG_CTRL, val); } } @@ -2087,391 +2030,6 @@ static void bnx2x_update_pfc_bmac2(struct link_params *params, REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_BMAC_CONTROL, wb_data, 2); } -/* PFC BRB internal port configuration params */ -struct bnx2x_pfc_brb_threshold_val { - u32 pause_xoff; - u32 pause_xon; - u32 full_xoff; - u32 full_xon; -}; - -struct bnx2x_pfc_brb_e3b0_val { - u32 per_class_guaranty_mode; - u32 lb_guarantied_hyst; - u32 full_lb_xoff_th; - u32 full_lb_xon_threshold; - u32 lb_guarantied; - u32 mac_0_class_t_guarantied; - u32 mac_0_class_t_guarantied_hyst; - u32 mac_1_class_t_guarantied; - u32 mac_1_class_t_guarantied_hyst; -}; - -struct bnx2x_pfc_brb_th_val { - struct bnx2x_pfc_brb_threshold_val pauseable_th; - struct bnx2x_pfc_brb_threshold_val non_pauseable_th; - struct bnx2x_pfc_brb_threshold_val default_class0; - struct bnx2x_pfc_brb_threshold_val default_class1; - -}; -static int bnx2x_pfc_brb_get_config_params( - struct link_params *params, - struct bnx2x_pfc_brb_th_val *config_val) -{ - struct bnx2x *bp = params->bp; - DP(NETIF_MSG_LINK, "Setting PFC BRB configuration\n"); - - config_val->default_class1.pause_xoff = 0; - config_val->default_class1.pause_xon = 0; - config_val->default_class1.full_xoff = 0; - config_val->default_class1.full_xon = 0; - - if (CHIP_IS_E2(bp)) { - /* Class0 defaults */ - config_val->default_class0.pause_xoff = - DEFAULT0_E2_BRB_MAC_PAUSE_XOFF_THR; - config_val->default_class0.pause_xon = - DEFAULT0_E2_BRB_MAC_PAUSE_XON_THR; - config_val->default_class0.full_xoff = - DEFAULT0_E2_BRB_MAC_FULL_XOFF_THR; - config_val->default_class0.full_xon = - DEFAULT0_E2_BRB_MAC_FULL_XON_THR; - /* Pause able*/ - config_val->pauseable_th.pause_xoff = - PFC_E2_BRB_MAC_PAUSE_XOFF_THR_PAUSE; - config_val->pauseable_th.pause_xon = - PFC_E2_BRB_MAC_PAUSE_XON_THR_PAUSE; - config_val->pauseable_th.full_xoff = - PFC_E2_BRB_MAC_FULL_XOFF_THR_PAUSE; - config_val->pauseable_th.full_xon = - PFC_E2_BRB_MAC_FULL_XON_THR_PAUSE; - /* Non pause able*/ - config_val->non_pauseable_th.pause_xoff = - PFC_E2_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE; - config_val->non_pauseable_th.pause_xon = - PFC_E2_BRB_MAC_PAUSE_XON_THR_NON_PAUSE; - config_val->non_pauseable_th.full_xoff = - PFC_E2_BRB_MAC_FULL_XOFF_THR_NON_PAUSE; - config_val->non_pauseable_th.full_xon = - PFC_E2_BRB_MAC_FULL_XON_THR_NON_PAUSE; - } else if (CHIP_IS_E3A0(bp)) { - /* Class0 defaults */ - config_val->default_class0.pause_xoff = - DEFAULT0_E3A0_BRB_MAC_PAUSE_XOFF_THR; - config_val->default_class0.pause_xon = - DEFAULT0_E3A0_BRB_MAC_PAUSE_XON_THR; - config_val->default_class0.full_xoff = - DEFAULT0_E3A0_BRB_MAC_FULL_XOFF_THR; - config_val->default_class0.full_xon = - DEFAULT0_E3A0_BRB_MAC_FULL_XON_THR; - /* Pause able */ - config_val->pauseable_th.pause_xoff = - PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_PAUSE; - config_val->pauseable_th.pause_xon = - PFC_E3A0_BRB_MAC_PAUSE_XON_THR_PAUSE; - config_val->pauseable_th.full_xoff = - PFC_E3A0_BRB_MAC_FULL_XOFF_THR_PAUSE; - config_val->pauseable_th.full_xon = - PFC_E3A0_BRB_MAC_FULL_XON_THR_PAUSE; - /* Non pause able*/ - config_val->non_pauseable_th.pause_xoff = - PFC_E3A0_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE; - config_val->non_pauseable_th.pause_xon = - PFC_E3A0_BRB_MAC_PAUSE_XON_THR_NON_PAUSE; - config_val->non_pauseable_th.full_xoff = - PFC_E3A0_BRB_MAC_FULL_XOFF_THR_NON_PAUSE; - config_val->non_pauseable_th.full_xon = - PFC_E3A0_BRB_MAC_FULL_XON_THR_NON_PAUSE; - } else if (CHIP_IS_E3B0(bp)) { - /* Class0 defaults */ - config_val->default_class0.pause_xoff = - DEFAULT0_E3B0_BRB_MAC_PAUSE_XOFF_THR; - config_val->default_class0.pause_xon = - DEFAULT0_E3B0_BRB_MAC_PAUSE_XON_THR; - config_val->default_class0.full_xoff = - DEFAULT0_E3B0_BRB_MAC_FULL_XOFF_THR; - config_val->default_class0.full_xon = - DEFAULT0_E3B0_BRB_MAC_FULL_XON_THR; - - if (params->phy[INT_PHY].flags & - FLAGS_4_PORT_MODE) { - config_val->pauseable_th.pause_xoff = - PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_PAUSE; - config_val->pauseable_th.pause_xon = - PFC_E3B0_4P_BRB_MAC_PAUSE_XON_THR_PAUSE; - config_val->pauseable_th.full_xoff = - PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_PAUSE; - config_val->pauseable_th.full_xon = - PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_PAUSE; - /* Non pause able*/ - config_val->non_pauseable_th.pause_xoff = - PFC_E3B0_4P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE; - config_val->non_pauseable_th.pause_xon = - PFC_E3B0_4P_BRB_MAC_PAUSE_XON_THR_NON_PAUSE; - config_val->non_pauseable_th.full_xoff = - PFC_E3B0_4P_BRB_MAC_FULL_XOFF_THR_NON_PAUSE; - config_val->non_pauseable_th.full_xon = - PFC_E3B0_4P_BRB_MAC_FULL_XON_THR_NON_PAUSE; - } else { - config_val->pauseable_th.pause_xoff = - PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_PAUSE; - config_val->pauseable_th.pause_xon = - PFC_E3B0_2P_BRB_MAC_PAUSE_XON_THR_PAUSE; - config_val->pauseable_th.full_xoff = - PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_PAUSE; - config_val->pauseable_th.full_xon = - PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_PAUSE; - /* Non pause able*/ - config_val->non_pauseable_th.pause_xoff = - PFC_E3B0_2P_BRB_MAC_PAUSE_XOFF_THR_NON_PAUSE; - config_val->non_pauseable_th.pause_xon = - PFC_E3B0_2P_BRB_MAC_PAUSE_XON_THR_NON_PAUSE; - config_val->non_pauseable_th.full_xoff = - PFC_E3B0_2P_BRB_MAC_FULL_XOFF_THR_NON_PAUSE; - config_val->non_pauseable_th.full_xon = - PFC_E3B0_2P_BRB_MAC_FULL_XON_THR_NON_PAUSE; - } - } else - return -EINVAL; - - return 0; -} - -static void bnx2x_pfc_brb_get_e3b0_config_params( - struct link_params *params, - struct bnx2x_pfc_brb_e3b0_val - *e3b0_val, - struct bnx2x_nig_brb_pfc_port_params *pfc_params, - const u8 pfc_enabled) -{ - if (pfc_enabled && pfc_params) { - e3b0_val->per_class_guaranty_mode = 1; - e3b0_val->lb_guarantied_hyst = 80; - - if (params->phy[INT_PHY].flags & - FLAGS_4_PORT_MODE) { - e3b0_val->full_lb_xoff_th = - PFC_E3B0_4P_BRB_FULL_LB_XOFF_THR; - e3b0_val->full_lb_xon_threshold = - PFC_E3B0_4P_BRB_FULL_LB_XON_THR; - e3b0_val->lb_guarantied = - PFC_E3B0_4P_LB_GUART; - e3b0_val->mac_0_class_t_guarantied = - PFC_E3B0_4P_BRB_MAC_0_CLASS_T_GUART; - e3b0_val->mac_0_class_t_guarantied_hyst = - PFC_E3B0_4P_BRB_MAC_0_CLASS_T_GUART_HYST; - e3b0_val->mac_1_class_t_guarantied = - PFC_E3B0_4P_BRB_MAC_1_CLASS_T_GUART; - e3b0_val->mac_1_class_t_guarantied_hyst = - PFC_E3B0_4P_BRB_MAC_1_CLASS_T_GUART_HYST; - } else { - e3b0_val->full_lb_xoff_th = - PFC_E3B0_2P_BRB_FULL_LB_XOFF_THR; - e3b0_val->full_lb_xon_threshold = - PFC_E3B0_2P_BRB_FULL_LB_XON_THR; - e3b0_val->mac_0_class_t_guarantied_hyst = - PFC_E3B0_2P_BRB_MAC_0_CLASS_T_GUART_HYST; - e3b0_val->mac_1_class_t_guarantied = - PFC_E3B0_2P_BRB_MAC_1_CLASS_T_GUART; - e3b0_val->mac_1_class_t_guarantied_hyst = - PFC_E3B0_2P_BRB_MAC_1_CLASS_T_GUART_HYST; - - if (pfc_params->cos0_pauseable != - pfc_params->cos1_pauseable) { - /* Nonpauseable= Lossy + pauseable = Lossless*/ - e3b0_val->lb_guarantied = - PFC_E3B0_2P_MIX_PAUSE_LB_GUART; - e3b0_val->mac_0_class_t_guarantied = - PFC_E3B0_2P_MIX_PAUSE_MAC_0_CLASS_T_GUART; - } else if (pfc_params->cos0_pauseable) { - /* Lossless +Lossless*/ - e3b0_val->lb_guarantied = - PFC_E3B0_2P_PAUSE_LB_GUART; - e3b0_val->mac_0_class_t_guarantied = - PFC_E3B0_2P_PAUSE_MAC_0_CLASS_T_GUART; - } else { - /* Lossy +Lossy*/ - e3b0_val->lb_guarantied = - PFC_E3B0_2P_NON_PAUSE_LB_GUART; - e3b0_val->mac_0_class_t_guarantied = - PFC_E3B0_2P_NON_PAUSE_MAC_0_CLASS_T_GUART; - } - } - } else { - e3b0_val->per_class_guaranty_mode = 0; - e3b0_val->lb_guarantied_hyst = 0; - e3b0_val->full_lb_xoff_th = - DEFAULT_E3B0_BRB_FULL_LB_XOFF_THR; - e3b0_val->full_lb_xon_threshold = - DEFAULT_E3B0_BRB_FULL_LB_XON_THR; - e3b0_val->lb_guarantied = - DEFAULT_E3B0_LB_GUART; - e3b0_val->mac_0_class_t_guarantied = - DEFAULT_E3B0_BRB_MAC_0_CLASS_T_GUART; - e3b0_val->mac_0_class_t_guarantied_hyst = - DEFAULT_E3B0_BRB_MAC_0_CLASS_T_GUART_HYST; - e3b0_val->mac_1_class_t_guarantied = - DEFAULT_E3B0_BRB_MAC_1_CLASS_T_GUART; - e3b0_val->mac_1_class_t_guarantied_hyst = - DEFAULT_E3B0_BRB_MAC_1_CLASS_T_GUART_HYST; - } -} -static int bnx2x_update_pfc_brb(struct link_params *params, - struct link_vars *vars, - struct bnx2x_nig_brb_pfc_port_params - *pfc_params) -{ - struct bnx2x *bp = params->bp; - struct bnx2x_pfc_brb_th_val config_val = { {0} }; - struct bnx2x_pfc_brb_threshold_val *reg_th_config = - &config_val.pauseable_th; - struct bnx2x_pfc_brb_e3b0_val e3b0_val = {0}; - const int set_pfc = params->feature_config_flags & - FEATURE_CONFIG_PFC_ENABLED; - const u8 pfc_enabled = (set_pfc && pfc_params); - int bnx2x_status = 0; - u8 port = params->port; - - /* default - pause configuration */ - reg_th_config = &config_val.pauseable_th; - bnx2x_status = bnx2x_pfc_brb_get_config_params(params, &config_val); - if (bnx2x_status) - return bnx2x_status; - - if (pfc_enabled) { - /* First COS */ - if (pfc_params->cos0_pauseable) - reg_th_config = &config_val.pauseable_th; - else - reg_th_config = &config_val.non_pauseable_th; - } else - reg_th_config = &config_val.default_class0; - /* The number of free blocks below which the pause signal to class 0 - * of MAC #n is asserted. n=0,1 - */ - REG_WR(bp, (port) ? BRB1_REG_PAUSE_0_XOFF_THRESHOLD_1 : - BRB1_REG_PAUSE_0_XOFF_THRESHOLD_0 , - reg_th_config->pause_xoff); - /* The number of free blocks above which the pause signal to class 0 - * of MAC #n is de-asserted. n=0,1 - */ - REG_WR(bp, (port) ? BRB1_REG_PAUSE_0_XON_THRESHOLD_1 : - BRB1_REG_PAUSE_0_XON_THRESHOLD_0 , reg_th_config->pause_xon); - /* The number of free blocks below which the full signal to class 0 - * of MAC #n is asserted. n=0,1 - */ - REG_WR(bp, (port) ? BRB1_REG_FULL_0_XOFF_THRESHOLD_1 : - BRB1_REG_FULL_0_XOFF_THRESHOLD_0 , reg_th_config->full_xoff); - /* The number of free blocks above which the full signal to class 0 - * of MAC #n is de-asserted. n=0,1 - */ - REG_WR(bp, (port) ? BRB1_REG_FULL_0_XON_THRESHOLD_1 : - BRB1_REG_FULL_0_XON_THRESHOLD_0 , reg_th_config->full_xon); - - if (pfc_enabled) { - /* Second COS */ - if (pfc_params->cos1_pauseable) - reg_th_config = &config_val.pauseable_th; - else - reg_th_config = &config_val.non_pauseable_th; - } else - reg_th_config = &config_val.default_class1; - /* The number of free blocks below which the pause signal to - * class 1 of MAC #n is asserted. n=0,1 - */ - REG_WR(bp, (port) ? BRB1_REG_PAUSE_1_XOFF_THRESHOLD_1 : - BRB1_REG_PAUSE_1_XOFF_THRESHOLD_0, - reg_th_config->pause_xoff); - - /* The number of free blocks above which the pause signal to - * class 1 of MAC #n is de-asserted. n=0,1 - */ - REG_WR(bp, (port) ? BRB1_REG_PAUSE_1_XON_THRESHOLD_1 : - BRB1_REG_PAUSE_1_XON_THRESHOLD_0, - reg_th_config->pause_xon); - /* The number of free blocks below which the full signal to - * class 1 of MAC #n is asserted. n=0,1 - */ - REG_WR(bp, (port) ? BRB1_REG_FULL_1_XOFF_THRESHOLD_1 : - BRB1_REG_FULL_1_XOFF_THRESHOLD_0, - reg_th_config->full_xoff); - /* The number of free blocks above which the full signal to - * class 1 of MAC #n is de-asserted. n=0,1 - */ - REG_WR(bp, (port) ? BRB1_REG_FULL_1_XON_THRESHOLD_1 : - BRB1_REG_FULL_1_XON_THRESHOLD_0, - reg_th_config->full_xon); - - if (CHIP_IS_E3B0(bp)) { - bnx2x_pfc_brb_get_e3b0_config_params( - params, - &e3b0_val, - pfc_params, - pfc_enabled); - - REG_WR(bp, BRB1_REG_PER_CLASS_GUARANTY_MODE, - e3b0_val.per_class_guaranty_mode); - - /* The hysteresis on the guarantied buffer space for the Lb - * port before signaling XON. - */ - REG_WR(bp, BRB1_REG_LB_GUARANTIED_HYST, - e3b0_val.lb_guarantied_hyst); - - /* The number of free blocks below which the full signal to the - * LB port is asserted. - */ - REG_WR(bp, BRB1_REG_FULL_LB_XOFF_THRESHOLD, - e3b0_val.full_lb_xoff_th); - /* The number of free blocks above which the full signal to the - * LB port is de-asserted. - */ - REG_WR(bp, BRB1_REG_FULL_LB_XON_THRESHOLD, - e3b0_val.full_lb_xon_threshold); - /* The number of blocks guarantied for the MAC #n port. n=0,1 - */ - - /* The number of blocks guarantied for the LB port. */ - REG_WR(bp, BRB1_REG_LB_GUARANTIED, - e3b0_val.lb_guarantied); - - /* The number of blocks guarantied for the MAC #n port. */ - REG_WR(bp, BRB1_REG_MAC_GUARANTIED_0, - 2 * e3b0_val.mac_0_class_t_guarantied); - REG_WR(bp, BRB1_REG_MAC_GUARANTIED_1, - 2 * e3b0_val.mac_1_class_t_guarantied); - /* The number of blocks guarantied for class #t in MAC0. t=0,1 - */ - REG_WR(bp, BRB1_REG_MAC_0_CLASS_0_GUARANTIED, - e3b0_val.mac_0_class_t_guarantied); - REG_WR(bp, BRB1_REG_MAC_0_CLASS_1_GUARANTIED, - e3b0_val.mac_0_class_t_guarantied); - /* The hysteresis on the guarantied buffer space for class in - * MAC0. t=0,1 - */ - REG_WR(bp, BRB1_REG_MAC_0_CLASS_0_GUARANTIED_HYST, - e3b0_val.mac_0_class_t_guarantied_hyst); - REG_WR(bp, BRB1_REG_MAC_0_CLASS_1_GUARANTIED_HYST, - e3b0_val.mac_0_class_t_guarantied_hyst); - - /* The number of blocks guarantied for class #t in MAC1.t=0,1 - */ - REG_WR(bp, BRB1_REG_MAC_1_CLASS_0_GUARANTIED, - e3b0_val.mac_1_class_t_guarantied); - REG_WR(bp, BRB1_REG_MAC_1_CLASS_1_GUARANTIED, - e3b0_val.mac_1_class_t_guarantied); - /* The hysteresis on the guarantied buffer space for class #t - * in MAC1. t=0,1 - */ - REG_WR(bp, BRB1_REG_MAC_1_CLASS_0_GUARANTIED_HYST, - e3b0_val.mac_1_class_t_guarantied_hyst); - REG_WR(bp, BRB1_REG_MAC_1_CLASS_1_GUARANTIED_HYST, - e3b0_val.mac_1_class_t_guarantied_hyst); - } - - return bnx2x_status; -} - /****************************************************************************** * Description: * This function is needed because NIG ARB_CREDIT_WEIGHT_X are @@ -2529,16 +2087,6 @@ static void bnx2x_update_mng(struct link_params *params, u32 link_status) port_mb[params->port].link_status), link_status); } -static void bnx2x_update_mng_eee(struct link_params *params, u32 eee_status) -{ - struct bnx2x *bp = params->bp; - - if (bnx2x_eee_has_cap(params)) - REG_WR(bp, params->shmem2_base + - offsetof(struct shmem2_region, - eee_status[params->port]), eee_status); -} - static void bnx2x_update_pfc_nig(struct link_params *params, struct link_vars *vars, struct bnx2x_nig_brb_pfc_port_params *nig_params) @@ -2658,11 +2206,6 @@ int bnx2x_update_pfc(struct link_params *params, /* Update NIG params */ bnx2x_update_pfc_nig(params, vars, pfc_params); - /* Update BRB params */ - bnx2x_status = bnx2x_update_pfc_brb(params, vars, pfc_params); - if (bnx2x_status) - return bnx2x_status; - if (!vars->link_up) return bnx2x_status; @@ -2827,16 +2370,18 @@ static int bnx2x_bmac2_enable(struct link_params *params, static int bnx2x_bmac_enable(struct link_params *params, struct link_vars *vars, - u8 is_lb) + u8 is_lb, u8 reset_bmac) { int rc = 0; u8 port = params->port; struct bnx2x *bp = params->bp; u32 val; /* Reset and unreset the BigMac */ - REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, - (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port)); - usleep_range(1000, 2000); + if (reset_bmac) { + REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR, + (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port)); + usleep_range(1000, 2000); + } REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port)); @@ -2868,37 +2413,28 @@ static int bnx2x_bmac_enable(struct link_params *params, return rc; } -static void bnx2x_bmac_rx_disable(struct bnx2x *bp, u8 port) +static void bnx2x_set_bmac_rx(struct bnx2x *bp, u32 chip_id, u8 port, u8 en) { u32 bmac_addr = port ? NIG_REG_INGRESS_BMAC1_MEM : NIG_REG_INGRESS_BMAC0_MEM; u32 wb_data[2]; u32 nig_bmac_enable = REG_RD(bp, NIG_REG_BMAC0_REGS_OUT_EN + port*4); + if (CHIP_IS_E2(bp)) + bmac_addr += BIGMAC2_REGISTER_BMAC_CONTROL; + else + bmac_addr += BIGMAC_REGISTER_BMAC_CONTROL; /* Only if the bmac is out of reset */ if (REG_RD(bp, MISC_REG_RESET_REG_2) & (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port) && nig_bmac_enable) { - - if (CHIP_IS_E2(bp)) { - /* Clear Rx Enable bit in BMAC_CONTROL register */ - REG_RD_DMAE(bp, bmac_addr + - BIGMAC2_REGISTER_BMAC_CONTROL, - wb_data, 2); - wb_data[0] &= ~BMAC_CONTROL_RX_ENABLE; - REG_WR_DMAE(bp, bmac_addr + - BIGMAC2_REGISTER_BMAC_CONTROL, - wb_data, 2); - } else { - /* Clear Rx Enable bit in BMAC_CONTROL register */ - REG_RD_DMAE(bp, bmac_addr + - BIGMAC_REGISTER_BMAC_CONTROL, - wb_data, 2); + /* Clear Rx Enable bit in BMAC_CONTROL register */ + REG_RD_DMAE(bp, bmac_addr, wb_data, 2); + if (en) + wb_data[0] |= BMAC_CONTROL_RX_ENABLE; + else wb_data[0] &= ~BMAC_CONTROL_RX_ENABLE; - REG_WR_DMAE(bp, bmac_addr + - BIGMAC_REGISTER_BMAC_CONTROL, - wb_data, 2); - } + REG_WR_DMAE(bp, bmac_addr, wb_data, 2); usleep_range(1000, 2000); } } @@ -3233,6 +2769,245 @@ static int bnx2x_cl45_write(struct bnx2x *bp, struct bnx2x_phy *phy, EMAC_MDIO_STATUS_10MB); return rc; } + +/******************************************************************/ +/* EEE section */ +/******************************************************************/ +static u8 bnx2x_eee_has_cap(struct link_params *params) +{ + struct bnx2x *bp = params->bp; + + if (REG_RD(bp, params->shmem2_base) <= + offsetof(struct shmem2_region, eee_status[params->port])) + return 0; + + return 1; +} + +static int bnx2x_eee_nvram_to_time(u32 nvram_mode, u32 *idle_timer) +{ + switch (nvram_mode) { + case PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED: + *idle_timer = EEE_MODE_NVRAM_BALANCED_TIME; + break; + case PORT_FEAT_CFG_EEE_POWER_MODE_AGGRESSIVE: + *idle_timer = EEE_MODE_NVRAM_AGGRESSIVE_TIME; + break; + case PORT_FEAT_CFG_EEE_POWER_MODE_LOW_LATENCY: + *idle_timer = EEE_MODE_NVRAM_LATENCY_TIME; + break; + default: + *idle_timer = 0; + break; + } + + return 0; +} + +static int bnx2x_eee_time_to_nvram(u32 idle_timer, u32 *nvram_mode) +{ + switch (idle_timer) { + case EEE_MODE_NVRAM_BALANCED_TIME: + *nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED; + break; + case EEE_MODE_NVRAM_AGGRESSIVE_TIME: + *nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_AGGRESSIVE; + break; + case EEE_MODE_NVRAM_LATENCY_TIME: + *nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_LOW_LATENCY; + break; + default: + *nvram_mode = PORT_FEAT_CFG_EEE_POWER_MODE_DISABLED; + break; + } + + return 0; +} + +static u32 bnx2x_eee_calc_timer(struct link_params *params) +{ + u32 eee_mode, eee_idle; + struct bnx2x *bp = params->bp; + + if (params->eee_mode & EEE_MODE_OVERRIDE_NVRAM) { + if (params->eee_mode & EEE_MODE_OUTPUT_TIME) { + /* time value in eee_mode --> used directly*/ + eee_idle = params->eee_mode & EEE_MODE_TIMER_MASK; + } else { + /* hsi value in eee_mode --> time */ + if (bnx2x_eee_nvram_to_time(params->eee_mode & + EEE_MODE_NVRAM_MASK, + &eee_idle)) + return 0; + } + } else { + /* hsi values in nvram --> time*/ + eee_mode = ((REG_RD(bp, params->shmem_base + + offsetof(struct shmem_region, dev_info. + port_feature_config[params->port]. + eee_power_mode)) & + PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >> + PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT); + + if (bnx2x_eee_nvram_to_time(eee_mode, &eee_idle)) + return 0; + } + + return eee_idle; +} + +static int bnx2x_eee_set_timers(struct link_params *params, + struct link_vars *vars) +{ + u32 eee_idle = 0, eee_mode; + struct bnx2x *bp = params->bp; + + eee_idle = bnx2x_eee_calc_timer(params); + + if (eee_idle) { + REG_WR(bp, MISC_REG_CPMU_LP_IDLE_THR_P0 + (params->port << 2), + eee_idle); + } else if ((params->eee_mode & EEE_MODE_ENABLE_LPI) && + (params->eee_mode & EEE_MODE_OVERRIDE_NVRAM) && + (params->eee_mode & EEE_MODE_OUTPUT_TIME)) { + DP(NETIF_MSG_LINK, "Error: Tx LPI is enabled with timer 0\n"); + return -EINVAL; + } + + vars->eee_status &= ~(SHMEM_EEE_TIMER_MASK | SHMEM_EEE_TIME_OUTPUT_BIT); + if (params->eee_mode & EEE_MODE_OUTPUT_TIME) { + /* eee_idle in 1u --> eee_status in 16u */ + eee_idle >>= 4; + vars->eee_status |= (eee_idle & SHMEM_EEE_TIMER_MASK) | + SHMEM_EEE_TIME_OUTPUT_BIT; + } else { + if (bnx2x_eee_time_to_nvram(eee_idle, &eee_mode)) + return -EINVAL; + vars->eee_status |= eee_mode; + } + + return 0; +} + +static int bnx2x_eee_initial_config(struct link_params *params, + struct link_vars *vars, u8 mode) +{ + vars->eee_status |= ((u32) mode) << SHMEM_EEE_SUPPORTED_SHIFT; + + /* Propogate params' bits --> vars (for migration exposure) */ + if (params->eee_mode & EEE_MODE_ENABLE_LPI) + vars->eee_status |= SHMEM_EEE_LPI_REQUESTED_BIT; + else + vars->eee_status &= ~SHMEM_EEE_LPI_REQUESTED_BIT; + + if (params->eee_mode & EEE_MODE_ADV_LPI) + vars->eee_status |= SHMEM_EEE_REQUESTED_BIT; + else + vars->eee_status &= ~SHMEM_EEE_REQUESTED_BIT; + + return bnx2x_eee_set_timers(params, vars); +} + +static int bnx2x_eee_disable(struct bnx2x_phy *phy, + struct link_params *params, + struct link_vars *vars) +{ + struct bnx2x *bp = params->bp; + + /* Make Certain LPI is disabled */ + REG_WR(bp, MISC_REG_CPMU_LP_FW_ENABLE_P0 + (params->port << 2), 0); + + bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, 0x0); + + vars->eee_status &= ~SHMEM_EEE_ADV_STATUS_MASK; + + return 0; +} + +static int bnx2x_eee_advertise(struct bnx2x_phy *phy, + struct link_params *params, + struct link_vars *vars, u8 modes) +{ + struct bnx2x *bp = params->bp; + u16 val = 0; + + /* Mask events preventing LPI generation */ + REG_WR(bp, MISC_REG_CPMU_LP_MASK_EXT_P0 + (params->port << 2), 0xfc20); + + if (modes & SHMEM_EEE_10G_ADV) { + DP(NETIF_MSG_LINK, "Advertise 10GBase-T EEE\n"); + val |= 0x8; + } + if (modes & SHMEM_EEE_1G_ADV) { + DP(NETIF_MSG_LINK, "Advertise 1GBase-T EEE\n"); + val |= 0x4; + } + + bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, val); + + vars->eee_status &= ~SHMEM_EEE_ADV_STATUS_MASK; + vars->eee_status |= (modes << SHMEM_EEE_ADV_STATUS_SHIFT); + + return 0; +} + +static void bnx2x_update_mng_eee(struct link_params *params, u32 eee_status) +{ + struct bnx2x *bp = params->bp; + + if (bnx2x_eee_has_cap(params)) + REG_WR(bp, params->shmem2_base + + offsetof(struct shmem2_region, + eee_status[params->port]), eee_status); +} + +static void bnx2x_eee_an_resolve(struct bnx2x_phy *phy, + struct link_params *params, + struct link_vars *vars) +{ + struct bnx2x *bp = params->bp; + u16 adv = 0, lp = 0; + u32 lp_adv = 0; + u8 neg = 0; + + bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, &adv); + bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_LP_EEE_ADV, &lp); + + if (lp & 0x2) { + lp_adv |= SHMEM_EEE_100M_ADV; + if (adv & 0x2) { + if (vars->line_speed == SPEED_100) + neg = 1; + DP(NETIF_MSG_LINK, "EEE negotiated - 100M\n"); + } + } + if (lp & 0x14) { + lp_adv |= SHMEM_EEE_1G_ADV; + if (adv & 0x14) { + if (vars->line_speed == SPEED_1000) + neg = 1; + DP(NETIF_MSG_LINK, "EEE negotiated - 1G\n"); + } + } + if (lp & 0x68) { + lp_adv |= SHMEM_EEE_10G_ADV; + if (adv & 0x68) { + if (vars->line_speed == SPEED_10000) + neg = 1; + DP(NETIF_MSG_LINK, "EEE negotiated - 10G\n"); + } + } + + vars->eee_status &= ~SHMEM_EEE_LP_ADV_STATUS_MASK; + vars->eee_status |= (lp_adv << SHMEM_EEE_LP_ADV_STATUS_SHIFT); + + if (neg) { + DP(NETIF_MSG_LINK, "EEE is active\n"); + vars->eee_status |= SHMEM_EEE_ACTIVE_BIT; + } + +} + /******************************************************************/ /* BSC access functions from E3 */ /******************************************************************/ @@ -3754,6 +3529,19 @@ static u8 bnx2x_ext_phy_resolve_fc(struct bnx2x_phy *phy, * init configuration, and set/clear SGMII flag. Internal * phy init is done purely in phy_init stage. */ + +static void bnx2x_warpcore_set_lpi_passthrough(struct bnx2x_phy *phy, + struct link_params *params) +{ + struct bnx2x *bp = params->bp; + + DP(NETIF_MSG_LINK, "Configure WC for LPI pass through\n"); + bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, + MDIO_WC_REG_EEE_COMBO_CONTROL0, 0x7c); + bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD, + MDIO_WC_REG_DIGITAL4_MISC5, 0xc000); +} + static void bnx2x_warpcore_enable_AN_KR(struct bnx2x_phy *phy, struct link_params *params, struct link_vars *vars) { @@ -4013,13 +3801,7 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy, bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL4_MISC3, 0x8080); - /* Enable LPI pass through */ - DP(NETIF_MSG_LINK, "Configure WC for LPI pass through\n"); - bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, - MDIO_WC_REG_EEE_COMBO_CONTROL0, - 0x7c); - bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD, - MDIO_WC_REG_DIGITAL4_MISC5, 0xc000); + bnx2x_warpcore_set_lpi_passthrough(phy, params); /* 10G XFI Full Duplex */ bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, @@ -4116,6 +3898,8 @@ static void bnx2x_warpcore_set_sgmii_speed(struct bnx2x_phy *phy, bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, MDIO_WC_REG_RX66_CONTROL, val16 & ~(3<<13)); + bnx2x_warpcore_set_lpi_passthrough(phy, params); + if (always_autoneg || phy->req_line_speed == SPEED_AUTO_NEG) { /* SGMII Autoneg */ bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD, @@ -4409,7 +4193,7 @@ static void bnx2x_warpcore_config_init(struct bnx2x_phy *phy, "serdes_net_if = 0x%x\n", vars->line_speed, serdes_net_if); bnx2x_set_aer_mmd(params, phy); - + bnx2x_warpcore_reset_lane(bp, phy, 1); vars->phy_flags |= PHY_XGXS_FLAG; if ((serdes_net_if == PORT_HW_CFG_NET_SERDES_IF_SGMII) || (phy->req_line_speed && @@ -4718,6 +4502,10 @@ void bnx2x_link_status_update(struct link_params *params, vars->link_status = REG_RD(bp, params->shmem_base + offsetof(struct shmem_region, port_mb[port].link_status)); + if (bnx2x_eee_has_cap(params)) + vars->eee_status = REG_RD(bp, params->shmem2_base + + offsetof(struct shmem2_region, + eee_status[params->port])); vars->phy_flags = PHY_XGXS_FLAG; bnx2x_sync_link(params, vars); @@ -6530,25 +6318,21 @@ static int bnx2x_update_link_down(struct link_params *params, usleep_range(10000, 20000); /* Reset BigMac/Xmac */ if (CHIP_IS_E1x(bp) || - CHIP_IS_E2(bp)) { - bnx2x_bmac_rx_disable(bp, params->port); - REG_WR(bp, GRCBASE_MISC + - MISC_REGISTERS_RESET_REG_2_CLEAR, - (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port)); - } + CHIP_IS_E2(bp)) + bnx2x_set_bmac_rx(bp, params->chip_id, params->port, 0); + if (CHIP_IS_E3(bp)) { /* Prevent LPI Generation by chip */ REG_WR(bp, MISC_REG_CPMU_LP_FW_ENABLE_P0 + (params->port << 2), 0); - REG_WR(bp, MISC_REG_CPMU_LP_DR_ENABLE, 0); REG_WR(bp, MISC_REG_CPMU_LP_MASK_ENT_P0 + (params->port << 2), 0); vars->eee_status &= ~(SHMEM_EEE_LP_ADV_STATUS_MASK | SHMEM_EEE_ACTIVE_BIT); bnx2x_update_mng_eee(params, vars->eee_status); - bnx2x_xmac_disable(params); - bnx2x_umac_disable(params); + bnx2x_set_xmac_rxtx(params, 0); + bnx2x_set_umac_rxtx(params, 0); } return 0; @@ -6600,7 +6384,7 @@ static int bnx2x_update_link_up(struct link_params *params, if ((CHIP_IS_E1x(bp) || CHIP_IS_E2(bp))) { if (link_10g) { - if (bnx2x_bmac_enable(params, vars, 0) == + if (bnx2x_bmac_enable(params, vars, 0, 1) == -ESRCH) { DP(NETIF_MSG_LINK, "Found errors on BMAC\n"); vars->link_up = 0; @@ -7207,6 +6991,22 @@ static void bnx2x_8073_set_pause_cl37(struct link_params *params, msleep(500); } +static void bnx2x_8073_specific_func(struct bnx2x_phy *phy, + struct link_params *params, + u32 action) +{ + struct bnx2x *bp = params->bp; + switch (action) { + case PHY_INIT: + /* Enable LASI */ + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, (1<<2)); + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x0004); + break; + } +} + static int bnx2x_8073_config_init(struct bnx2x_phy *phy, struct link_params *params, struct link_vars *vars) @@ -7227,12 +7027,7 @@ static int bnx2x_8073_config_init(struct bnx2x_phy *phy, bnx2x_set_gpio(bp, MISC_REGISTERS_GPIO_1, MISC_REGISTERS_GPIO_OUTPUT_HIGH, gpio_port); - /* Enable LASI */ - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, (1<<2)); - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x0004); - + bnx2x_8073_specific_func(phy, params, PHY_INIT); bnx2x_8073_set_pause_cl37(params, phy, vars); bnx2x_cl45_read(bp, phy, @@ -8267,7 +8062,7 @@ static void bnx2x_8727_specific_func(struct bnx2x_phy *phy, u32 action) { struct bnx2x *bp = params->bp; - + u16 val; switch (action) { case DISABLE_TX: bnx2x_sfp_set_transmitter(params, phy, 0); @@ -8276,6 +8071,40 @@ static void bnx2x_8727_specific_func(struct bnx2x_phy *phy, if (!(phy->flags & FLAGS_SFP_NOT_APPROVED)) bnx2x_sfp_set_transmitter(params, phy, 1); break; + case PHY_INIT: + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, + (1<<2) | (1<<5)); + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, MDIO_PMA_LASI_TXCTRL, + 0); + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x0006); + /* Make MOD_ABS give interrupt on change */ + bnx2x_cl45_read(bp, phy, MDIO_PMA_DEVAD, + MDIO_PMA_REG_8727_PCS_OPT_CTRL, + &val); + val |= (1<<12); + if (phy->flags & FLAGS_NOC) + val |= (3<<5); + /* Set 8727 GPIOs to input to allow reading from the 8727 GPIO0 + * status which reflect SFP+ module over-current + */ + if (!(phy->flags & FLAGS_NOC)) + val &= 0xff8f; /* Reset bits 4-6 */ + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL, + val); + + /* Set 2-wire transfer rate of SFP+ module EEPROM + * to 100Khz since some DACs(direct attached cables) do + * not work at 400Khz. + */ + bnx2x_cl45_write(bp, phy, + MDIO_PMA_DEVAD, + MDIO_PMA_REG_8727_TWO_WIRE_SLAVE_ADDR, + 0xa001); + break; default: DP(NETIF_MSG_LINK, "Function 0x%x not supported by 8727\n", action); @@ -9058,28 +8887,15 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy, struct link_vars *vars) { u32 tx_en_mode; - u16 tmp1, val, mod_abs, tmp2; - u16 rx_alarm_ctrl_val; - u16 lasi_ctrl_val; + u16 tmp1, mod_abs, tmp2; struct bnx2x *bp = params->bp; /* Enable PMD link, MOD_ABS_FLT, and 1G link alarm */ bnx2x_wait_reset_complete(bp, phy, params); - rx_alarm_ctrl_val = (1<<2) | (1<<5) ; - /* Should be 0x6 to enable XS on Tx side. */ - lasi_ctrl_val = 0x0006; DP(NETIF_MSG_LINK, "Initializing BCM8727\n"); - /* Enable LASI */ - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, - rx_alarm_ctrl_val); - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, MDIO_PMA_LASI_TXCTRL, - 0); - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, lasi_ctrl_val); + bnx2x_8727_specific_func(phy, params, PHY_INIT); /* Initially configure MOD_ABS to interrupt when module is * presence( bit 8) */ @@ -9095,25 +8911,9 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy, bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER, mod_abs); - /* Enable/Disable PHY transmitter output */ bnx2x_set_disable_pmd_transmit(params, phy, 0); - /* Make MOD_ABS give interrupt on change */ - bnx2x_cl45_read(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL, - &val); - val |= (1<<12); - if (phy->flags & FLAGS_NOC) - val |= (3<<5); - - /* Set 8727 GPIOs to input to allow reading from the 8727 GPIO0 - * status which reflect SFP+ module over-current - */ - if (!(phy->flags & FLAGS_NOC)) - val &= 0xff8f; /* Reset bits 4-6 */ - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL, val); - bnx2x_8727_power_module(bp, phy, 1); bnx2x_cl45_read(bp, phy, @@ -9123,13 +8923,7 @@ static int bnx2x_8727_config_init(struct bnx2x_phy *phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &tmp1); bnx2x_8727_config_speed(phy, params); - /* Set 2-wire transfer rate of SFP+ module EEPROM - * to 100Khz since some DACs(direct attached cables) do - * not work at 400Khz. - */ - bnx2x_cl45_write(bp, phy, - MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_TWO_WIRE_SLAVE_ADDR, - 0xa001); + /* Set TX PreEmphasis if needed */ if ((params->feature_config_flags & @@ -9558,6 +9352,29 @@ static void bnx2x_848xx_set_led(struct bnx2x *bp, 0xFFFB, 0xFFFD); } +static void bnx2x_848xx_specific_func(struct bnx2x_phy *phy, + struct link_params *params, + u32 action) +{ + struct bnx2x *bp = params->bp; + switch (action) { + case PHY_INIT: + if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) { + /* Save spirom version */ + bnx2x_save_848xx_spirom_version(phy, bp, params->port); + } + /* This phy uses the NIG latch mechanism since link indication + * arrives through its LED4 and not via its LASI signal, so we + * get steady signal instead of clear on read + */ + bnx2x_bits_en(bp, NIG_REG_LATCH_BC_0 + params->port*4, + 1 << NIG_LATCH_BC_ENABLE_MI_INT); + + bnx2x_848xx_set_led(bp, phy); + break; + } +} + static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy, struct link_params *params, struct link_vars *vars) @@ -9565,22 +9382,10 @@ static int bnx2x_848xx_cmn_config_init(struct bnx2x_phy *phy, struct bnx2x *bp = params->bp; u16 autoneg_val, an_1000_val, an_10_100_val, an_10g_val; - if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) { - /* Save spirom version */ - bnx2x_save_848xx_spirom_version(phy, bp, params->port); - } - /* This phy uses the NIG latch mechanism since link indication - * arrives through its LED4 and not via its LASI signal, so we - * get steady signal instead of clear on read - */ - bnx2x_bits_en(bp, NIG_REG_LATCH_BC_0 + params->port*4, - 1 << NIG_LATCH_BC_ENABLE_MI_INT); - + bnx2x_848xx_specific_func(phy, params, PHY_INIT); bnx2x_cl45_write(bp, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x0000); - bnx2x_848xx_set_led(bp, phy); - /* set 1000 speed advertisement */ bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_8481_1000T_CTRL, @@ -9887,39 +9692,6 @@ static int bnx2x_84833_hw_reset_phy(struct bnx2x_phy *phy, return 0; } -static int bnx2x_8483x_eee_timers(struct link_params *params, - struct link_vars *vars) -{ - u32 eee_idle = 0, eee_mode; - struct bnx2x *bp = params->bp; - - eee_idle = bnx2x_eee_calc_timer(params); - - if (eee_idle) { - REG_WR(bp, MISC_REG_CPMU_LP_IDLE_THR_P0 + (params->port << 2), - eee_idle); - } else if ((params->eee_mode & EEE_MODE_ENABLE_LPI) && - (params->eee_mode & EEE_MODE_OVERRIDE_NVRAM) && - (params->eee_mode & EEE_MODE_OUTPUT_TIME)) { - DP(NETIF_MSG_LINK, "Error: Tx LPI is enabled with timer 0\n"); - return -EINVAL; - } - - vars->eee_status &= ~(SHMEM_EEE_TIMER_MASK | SHMEM_EEE_TIME_OUTPUT_BIT); - if (params->eee_mode & EEE_MODE_OUTPUT_TIME) { - /* eee_idle in 1u --> eee_status in 16u */ - eee_idle >>= 4; - vars->eee_status |= (eee_idle & SHMEM_EEE_TIMER_MASK) | - SHMEM_EEE_TIME_OUTPUT_BIT; - } else { - if (bnx2x_eee_time_to_nvram(eee_idle, &eee_mode)) - return -EINVAL; - vars->eee_status |= eee_mode; - } - - return 0; -} - static int bnx2x_8483x_disable_eee(struct bnx2x_phy *phy, struct link_params *params, struct link_vars *vars) @@ -9930,10 +9702,6 @@ static int bnx2x_8483x_disable_eee(struct bnx2x_phy *phy, DP(NETIF_MSG_LINK, "Don't Advertise 10GBase-T EEE\n"); - /* Make Certain LPI is disabled */ - REG_WR(bp, MISC_REG_CPMU_LP_FW_ENABLE_P0 + (params->port << 2), 0); - REG_WR(bp, MISC_REG_CPMU_LP_DR_ENABLE, 0); - /* Prevent Phy from working in EEE and advertising it */ rc = bnx2x_84833_cmd_hdlr(phy, params, PHY84833_CMD_SET_EEE_MODE, &cmd_args, 1); @@ -9942,10 +9710,7 @@ static int bnx2x_8483x_disable_eee(struct bnx2x_phy *phy, return rc; } - bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, 0); - vars->eee_status &= ~SHMEM_EEE_ADV_STATUS_MASK; - - return 0; + return bnx2x_eee_disable(phy, params, vars); } static int bnx2x_8483x_enable_eee(struct bnx2x_phy *phy, @@ -9956,8 +9721,6 @@ static int bnx2x_8483x_enable_eee(struct bnx2x_phy *phy, struct bnx2x *bp = params->bp; u16 cmd_args = 1; - DP(NETIF_MSG_LINK, "Advertise 10GBase-T EEE\n"); - rc = bnx2x_84833_cmd_hdlr(phy, params, PHY84833_CMD_SET_EEE_MODE, &cmd_args, 1); if (rc) { @@ -9965,15 +9728,7 @@ static int bnx2x_8483x_enable_eee(struct bnx2x_phy *phy, return rc; } - bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, MDIO_AN_REG_EEE_ADV, 0x8); - - /* Mask events preventing LPI generation */ - REG_WR(bp, MISC_REG_CPMU_LP_MASK_EXT_P0 + (params->port << 2), 0xfc20); - - vars->eee_status &= ~SHMEM_EEE_ADV_STATUS_MASK; - vars->eee_status |= (SHMEM_EEE_10G_ADV << SHMEM_EEE_ADV_STATUS_SHIFT); - - return 0; + return bnx2x_eee_advertise(phy, params, vars, SHMEM_EEE_10G_ADV); } #define PHY84833_CONSTANT_LATENCY 1193 @@ -10105,22 +9860,10 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy, MDIO_84833_TOP_CFG_FW_REV, &val); /* Configure EEE support */ - if ((val >= MDIO_84833_TOP_CFG_FW_EEE) && bnx2x_eee_has_cap(params)) { - phy->flags |= FLAGS_EEE_10GBT; - vars->eee_status |= SHMEM_EEE_10G_ADV << - SHMEM_EEE_SUPPORTED_SHIFT; - /* Propogate params' bits --> vars (for migration exposure) */ - if (params->eee_mode & EEE_MODE_ENABLE_LPI) - vars->eee_status |= SHMEM_EEE_LPI_REQUESTED_BIT; - else - vars->eee_status &= ~SHMEM_EEE_LPI_REQUESTED_BIT; - - if (params->eee_mode & EEE_MODE_ADV_LPI) - vars->eee_status |= SHMEM_EEE_REQUESTED_BIT; - else - vars->eee_status &= ~SHMEM_EEE_REQUESTED_BIT; - - rc = bnx2x_8483x_eee_timers(params, vars); + if ((val >= MDIO_84833_TOP_CFG_FW_EEE) && + (val != MDIO_84833_TOP_CFG_FW_NO_EEE) && + bnx2x_eee_has_cap(params)) { + rc = bnx2x_eee_initial_config(params, vars, SHMEM_EEE_10G_ADV); if (rc) { DP(NETIF_MSG_LINK, "Failed to configure EEE timers\n"); bnx2x_8483x_disable_eee(phy, params, vars); @@ -10139,7 +9882,6 @@ static int bnx2x_848x3_config_init(struct bnx2x_phy *phy, return rc; } } else { - phy->flags &= ~FLAGS_EEE_10GBT; vars->eee_status &= ~SHMEM_EEE_SUPPORTED_MASK; } @@ -10278,29 +10020,8 @@ static u8 bnx2x_848xx_read_status(struct bnx2x_phy *phy, LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE; /* Determine if EEE was negotiated */ - if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) { - u32 eee_shmem = 0; - - bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD, - MDIO_AN_REG_EEE_ADV, &val1); - bnx2x_cl45_read(bp, phy, MDIO_AN_DEVAD, - MDIO_AN_REG_LP_EEE_ADV, &val2); - if ((val1 & val2) & 0x8) { - DP(NETIF_MSG_LINK, "EEE negotiated\n"); - vars->eee_status |= SHMEM_EEE_ACTIVE_BIT; - } - - if (val2 & 0x12) - eee_shmem |= SHMEM_EEE_100M_ADV; - if (val2 & 0x4) - eee_shmem |= SHMEM_EEE_1G_ADV; - if (val2 & 0x68) - eee_shmem |= SHMEM_EEE_10G_ADV; - - vars->eee_status &= ~SHMEM_EEE_LP_ADV_STATUS_MASK; - vars->eee_status |= (eee_shmem << - SHMEM_EEE_LP_ADV_STATUS_SHIFT); - } + if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM84833) + bnx2x_eee_an_resolve(phy, params, vars); } return link_up; @@ -10569,6 +10290,35 @@ static void bnx2x_848xx_set_link_led(struct bnx2x_phy *phy, /******************************************************************/ /* 54618SE PHY SECTION */ /******************************************************************/ +static void bnx2x_54618se_specific_func(struct bnx2x_phy *phy, + struct link_params *params, + u32 action) +{ + struct bnx2x *bp = params->bp; + u16 temp; + switch (action) { + case PHY_INIT: + /* Configure LED4: set to INTR (0x6). */ + /* Accessing shadow register 0xe. */ + bnx2x_cl22_write(bp, phy, + MDIO_REG_GPHY_SHADOW, + MDIO_REG_GPHY_SHADOW_LED_SEL2); + bnx2x_cl22_read(bp, phy, + MDIO_REG_GPHY_SHADOW, + &temp); + temp &= ~(0xf << 4); + temp |= (0x6 << 4); + bnx2x_cl22_write(bp, phy, + MDIO_REG_GPHY_SHADOW, + MDIO_REG_GPHY_SHADOW_WR_ENA | temp); + /* Configure INTR based on link status change. */ + bnx2x_cl22_write(bp, phy, + MDIO_REG_INTR_MASK, + ~MDIO_REG_INTR_MASK_LINK_STATUS); + break; + } +} + static int bnx2x_54618se_config_init(struct bnx2x_phy *phy, struct link_params *params, struct link_vars *vars) @@ -10606,24 +10356,8 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy, /* Wait for GPHY to reset */ msleep(50); - /* Configure LED4: set to INTR (0x6). */ - /* Accessing shadow register 0xe. */ - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_SHADOW, - MDIO_REG_GPHY_SHADOW_LED_SEL2); - bnx2x_cl22_read(bp, phy, - MDIO_REG_GPHY_SHADOW, - &temp); - temp &= ~(0xf << 4); - temp |= (0x6 << 4); - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_SHADOW, - MDIO_REG_GPHY_SHADOW_WR_ENA | temp); - /* Configure INTR based on link status change. */ - bnx2x_cl22_write(bp, phy, - MDIO_REG_INTR_MASK, - ~MDIO_REG_INTR_MASK_LINK_STATUS); + bnx2x_54618se_specific_func(phy, params, PHY_INIT); /* Flip the signal detect polarity (set 0x1c.0x1e[8]). */ bnx2x_cl22_write(bp, phy, MDIO_REG_GPHY_SHADOW, @@ -10728,28 +10462,52 @@ static int bnx2x_54618se_config_init(struct bnx2x_phy *phy, DP(NETIF_MSG_LINK, "Setting 10M force\n"); } - /* Check if we should turn on Auto-GrEEEn */ - bnx2x_cl22_read(bp, phy, MDIO_REG_GPHY_PHYID_LSB, &temp); - if (temp == MDIO_REG_GPHY_ID_54618SE) { - if (params->feature_config_flags & - FEATURE_CONFIG_AUTOGREEEN_ENABLED) { - temp = 6; - DP(NETIF_MSG_LINK, "Enabling Auto-GrEEEn\n"); + if ((phy->flags & FLAGS_EEE) && bnx2x_eee_has_cap(params)) { + int rc; + + bnx2x_cl22_write(bp, phy, MDIO_REG_GPHY_EXP_ACCESS, + MDIO_REG_GPHY_EXP_ACCESS_TOP | + MDIO_REG_GPHY_EXP_TOP_2K_BUF); + bnx2x_cl22_read(bp, phy, MDIO_REG_GPHY_EXP_ACCESS_GATE, &temp); + temp &= 0xfffe; + bnx2x_cl22_write(bp, phy, MDIO_REG_GPHY_EXP_ACCESS_GATE, temp); + + rc = bnx2x_eee_initial_config(params, vars, SHMEM_EEE_1G_ADV); + if (rc) { + DP(NETIF_MSG_LINK, "Failed to configure EEE timers\n"); + bnx2x_eee_disable(phy, params, vars); + } else if ((params->eee_mode & EEE_MODE_ADV_LPI) && + (phy->req_duplex == DUPLEX_FULL) && + (bnx2x_eee_calc_timer(params) || + !(params->eee_mode & EEE_MODE_ENABLE_LPI))) { + /* Need to advertise EEE only when requested, + * and either no LPI assertion was requested, + * or it was requested and a valid timer was set. + * Also notice full duplex is required for EEE. + */ + bnx2x_eee_advertise(phy, params, vars, + SHMEM_EEE_1G_ADV); } else { - temp = 0; - DP(NETIF_MSG_LINK, "Disabling Auto-GrEEEn\n"); + DP(NETIF_MSG_LINK, "Don't Advertise 1GBase-T EEE\n"); + bnx2x_eee_disable(phy, params, vars); + } + } else { + vars->eee_status &= ~SHMEM_EEE_1G_ADV << + SHMEM_EEE_SUPPORTED_SHIFT; + + if (phy->flags & FLAGS_EEE) { + /* Handle legacy auto-grEEEn */ + if (params->feature_config_flags & + FEATURE_CONFIG_AUTOGREEEN_ENABLED) { + temp = 6; + DP(NETIF_MSG_LINK, "Enabling Auto-GrEEEn\n"); + } else { + temp = 0; + DP(NETIF_MSG_LINK, "Don't Adv. EEE\n"); + } + bnx2x_cl45_write(bp, phy, MDIO_AN_DEVAD, + MDIO_AN_REG_EEE_ADV, temp); } - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_CL45_ADDR_REG, MDIO_AN_DEVAD); - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_CL45_DATA_REG, - MDIO_REG_GPHY_EEE_ADV); - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_CL45_ADDR_REG, - (0x1 << 14) | MDIO_AN_DEVAD); - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_CL45_DATA_REG, - temp); } bnx2x_cl22_write(bp, phy, @@ -10896,29 +10654,6 @@ static u8 bnx2x_54618se_read_status(struct bnx2x_phy *phy, DP(NETIF_MSG_LINK, "BCM54618SE: link speed is %d\n", vars->line_speed); - /* Report whether EEE is resolved. */ - bnx2x_cl22_read(bp, phy, MDIO_REG_GPHY_PHYID_LSB, &val); - if (val == MDIO_REG_GPHY_ID_54618SE) { - if (vars->link_status & - LINK_STATUS_AUTO_NEGOTIATE_COMPLETE) - val = 0; - else { - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_CL45_ADDR_REG, - MDIO_AN_DEVAD); - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_CL45_DATA_REG, - MDIO_REG_GPHY_EEE_RESOLVED); - bnx2x_cl22_write(bp, phy, - MDIO_REG_GPHY_CL45_ADDR_REG, - (0x1 << 14) | MDIO_AN_DEVAD); - bnx2x_cl22_read(bp, phy, - MDIO_REG_GPHY_CL45_DATA_REG, - &val); - } - DP(NETIF_MSG_LINK, "EEE resolution: 0x%x\n", val); - } - bnx2x_ext_phy_resolve_fc(phy, params, vars); if (vars->link_status & LINK_STATUS_AUTO_NEGOTIATE_COMPLETE) { @@ -10948,6 +10683,10 @@ static u8 bnx2x_54618se_read_status(struct bnx2x_phy *phy, if (val & (1<<11)) vars->link_status |= LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE; + + if ((phy->flags & FLAGS_EEE) && + bnx2x_eee_has_cap(params)) + bnx2x_eee_an_resolve(phy, params, vars); } } return link_up; @@ -11353,7 +11092,7 @@ static struct bnx2x_phy phy_8073 = { .format_fw_ver = (format_fw_ver_t)bnx2x_format_ver, .hw_reset = (hw_reset_t)NULL, .set_link_led = (set_link_led_t)NULL, - .phy_specific_func = (phy_specific_func_t)NULL + .phy_specific_func = (phy_specific_func_t)bnx2x_8073_specific_func }; static struct bnx2x_phy phy_8705 = { .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM8705, @@ -11546,7 +11285,7 @@ static struct bnx2x_phy phy_84823 = { .format_fw_ver = (format_fw_ver_t)bnx2x_848xx_format_ver, .hw_reset = (hw_reset_t)NULL, .set_link_led = (set_link_led_t)bnx2x_848xx_set_link_led, - .phy_specific_func = (phy_specific_func_t)NULL + .phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func }; static struct bnx2x_phy phy_84833 = { @@ -11555,8 +11294,7 @@ static struct bnx2x_phy phy_84833 = { .def_md_devad = 0, .flags = (FLAGS_FAN_FAILURE_DET_REQ | FLAGS_REARM_LATCH_SIGNAL | - FLAGS_TX_ERROR_CHECK | - FLAGS_EEE_10GBT), + FLAGS_TX_ERROR_CHECK), .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, .mdio_ctrl = 0, @@ -11582,7 +11320,7 @@ static struct bnx2x_phy phy_84833 = { .format_fw_ver = (format_fw_ver_t)bnx2x_848xx_format_ver, .hw_reset = (hw_reset_t)bnx2x_84833_hw_reset_phy, .set_link_led = (set_link_led_t)bnx2x_848xx_set_link_led, - .phy_specific_func = (phy_specific_func_t)NULL + .phy_specific_func = (phy_specific_func_t)bnx2x_848xx_specific_func }; static struct bnx2x_phy phy_54618se = { @@ -11616,7 +11354,7 @@ static struct bnx2x_phy phy_54618se = { .format_fw_ver = (format_fw_ver_t)NULL, .hw_reset = (hw_reset_t)NULL, .set_link_led = (set_link_led_t)bnx2x_5461x_set_link_led, - .phy_specific_func = (phy_specific_func_t)NULL + .phy_specific_func = (phy_specific_func_t)bnx2x_54618se_specific_func }; /*****************************************************************/ /* */ @@ -11862,6 +11600,8 @@ static int bnx2x_populate_ext_phy(struct bnx2x *bp, case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54616: case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE: *phy = phy_54618se; + if (phy_type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BCM54618SE) + phy->flags |= FLAGS_EEE; break; case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101: *phy = phy_7101; @@ -12141,7 +11881,7 @@ void bnx2x_init_bmac_loopback(struct link_params *params, bnx2x_xgxs_deassert(params); /* set bmac loopback */ - bnx2x_bmac_enable(params, vars, 1); + bnx2x_bmac_enable(params, vars, 1, 1); REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); } @@ -12233,7 +11973,7 @@ void bnx2x_init_xgxs_loopback(struct link_params *params, if (USES_WARPCORE(bp)) bnx2x_xmac_enable(params, vars, 0); else - bnx2x_bmac_enable(params, vars, 0); + bnx2x_bmac_enable(params, vars, 0, 1); } if (params->loopback_mode == LOOPBACK_XGXS) { @@ -12258,8 +11998,161 @@ void bnx2x_init_xgxs_loopback(struct link_params *params, bnx2x_set_led(params, vars, LED_MODE_OPER, vars->line_speed); } +static void bnx2x_set_rx_filter(struct link_params *params, u8 en) +{ + struct bnx2x *bp = params->bp; + u8 val = en * 0x1F; + + /* Open the gate between the NIG to the BRB */ + if (!CHIP_IS_E1x(bp)) + val |= en * 0x20; + REG_WR(bp, NIG_REG_LLH0_BRB1_DRV_MASK + params->port*4, val); + + if (!CHIP_IS_E1(bp)) { + REG_WR(bp, NIG_REG_LLH0_BRB1_DRV_MASK_MF + params->port*4, + en*0x3); + } + + REG_WR(bp, (params->port ? NIG_REG_LLH1_BRB1_NOT_MCP : + NIG_REG_LLH0_BRB1_NOT_MCP), en); +} +static int bnx2x_avoid_link_flap(struct link_params *params, + struct link_vars *vars) +{ + u32 phy_idx; + u32 dont_clear_stat, lfa_sts; + struct bnx2x *bp = params->bp; + + /* Sync the link parameters */ + bnx2x_link_status_update(params, vars); + + /* + * The module verification was already done by previous link owner, + * so this call is meant only to get warning message + */ + + for (phy_idx = INT_PHY; phy_idx < params->num_phys; phy_idx++) { + struct bnx2x_phy *phy = ¶ms->phy[phy_idx]; + if (phy->phy_specific_func) { + DP(NETIF_MSG_LINK, "Calling PHY specific func\n"); + phy->phy_specific_func(phy, params, PHY_INIT); + } + if ((phy->media_type == ETH_PHY_SFPP_10G_FIBER) || + (phy->media_type == ETH_PHY_SFP_1G_FIBER) || + (phy->media_type == ETH_PHY_DA_TWINAX)) + bnx2x_verify_sfp_module(phy, params); + } + lfa_sts = REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, + lfa_sts)); + + dont_clear_stat = lfa_sts & SHMEM_LFA_DONT_CLEAR_STAT; + + /* Re-enable the NIG/MAC */ + if (CHIP_IS_E3(bp)) { + if (!dont_clear_stat) { + REG_WR(bp, GRCBASE_MISC + + MISC_REGISTERS_RESET_REG_2_CLEAR, + (MISC_REGISTERS_RESET_REG_2_MSTAT0 << + params->port)); + REG_WR(bp, GRCBASE_MISC + + MISC_REGISTERS_RESET_REG_2_SET, + (MISC_REGISTERS_RESET_REG_2_MSTAT0 << + params->port)); + } + if (vars->line_speed < SPEED_10000) + bnx2x_umac_enable(params, vars, 0); + else + bnx2x_xmac_enable(params, vars, 0); + } else { + if (vars->line_speed < SPEED_10000) + bnx2x_emac_enable(params, vars, 0); + else + bnx2x_bmac_enable(params, vars, 0, !dont_clear_stat); + } + + /* Increment LFA count */ + lfa_sts = ((lfa_sts & ~LINK_FLAP_AVOIDANCE_COUNT_MASK) | + (((((lfa_sts & LINK_FLAP_AVOIDANCE_COUNT_MASK) >> + LINK_FLAP_AVOIDANCE_COUNT_OFFSET) + 1) & 0xff) + << LINK_FLAP_AVOIDANCE_COUNT_OFFSET)); + /* Clear link flap reason */ + lfa_sts &= ~LFA_LINK_FLAP_REASON_MASK; + + REG_WR(bp, params->lfa_base + + offsetof(struct shmem_lfa, lfa_sts), lfa_sts); + + /* Disable NIG DRAIN */ + REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); + + /* Enable interrupts */ + bnx2x_link_int_enable(params); + return 0; +} + +static void bnx2x_cannot_avoid_link_flap(struct link_params *params, + struct link_vars *vars, + int lfa_status) +{ + u32 lfa_sts, cfg_idx, tmp_val; + struct bnx2x *bp = params->bp; + + bnx2x_link_reset(params, vars, 1); + + if (!params->lfa_base) + return; + /* Store the new link parameters */ + REG_WR(bp, params->lfa_base + + offsetof(struct shmem_lfa, req_duplex), + params->req_duplex[0] | (params->req_duplex[1] << 16)); + + REG_WR(bp, params->lfa_base + + offsetof(struct shmem_lfa, req_flow_ctrl), + params->req_flow_ctrl[0] | (params->req_flow_ctrl[1] << 16)); + + REG_WR(bp, params->lfa_base + + offsetof(struct shmem_lfa, req_line_speed), + params->req_line_speed[0] | (params->req_line_speed[1] << 16)); + + for (cfg_idx = 0; cfg_idx < SHMEM_LINK_CONFIG_SIZE; cfg_idx++) { + REG_WR(bp, params->lfa_base + + offsetof(struct shmem_lfa, + speed_cap_mask[cfg_idx]), + params->speed_cap_mask[cfg_idx]); + } + + tmp_val = REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, additional_config)); + tmp_val &= ~REQ_FC_AUTO_ADV_MASK; + tmp_val |= params->req_fc_auto_adv; + + REG_WR(bp, params->lfa_base + + offsetof(struct shmem_lfa, additional_config), tmp_val); + + lfa_sts = REG_RD(bp, params->lfa_base + + offsetof(struct shmem_lfa, lfa_sts)); + + /* Clear the "Don't Clear Statistics" bit, and set reason */ + lfa_sts &= ~SHMEM_LFA_DONT_CLEAR_STAT; + + /* Set link flap reason */ + lfa_sts &= ~LFA_LINK_FLAP_REASON_MASK; + lfa_sts |= ((lfa_status & LFA_LINK_FLAP_REASON_MASK) << + LFA_LINK_FLAP_REASON_OFFSET); + + /* Increment link flap counter */ + lfa_sts = ((lfa_sts & ~LINK_FLAP_COUNT_MASK) | + (((((lfa_sts & LINK_FLAP_COUNT_MASK) >> + LINK_FLAP_COUNT_OFFSET) + 1) & 0xff) + << LINK_FLAP_COUNT_OFFSET)); + REG_WR(bp, params->lfa_base + + offsetof(struct shmem_lfa, lfa_sts), lfa_sts); + /* Proceed with regular link initialization */ +} + int bnx2x_phy_init(struct link_params *params, struct link_vars *vars) { + int lfa_status; struct bnx2x *bp = params->bp; DP(NETIF_MSG_LINK, "Phy Initialization started\n"); DP(NETIF_MSG_LINK, "(1) req_speed %d, req_flowctrl %d\n", @@ -12274,6 +12167,19 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars) vars->flow_ctrl = BNX2X_FLOW_CTRL_NONE; vars->mac_type = MAC_TYPE_NONE; vars->phy_flags = 0; + /* Driver opens NIG-BRB filters */ + bnx2x_set_rx_filter(params, 1); + /* Check if link flap can be avoided */ + lfa_status = bnx2x_check_lfa(params); + + if (lfa_status == 0) { + DP(NETIF_MSG_LINK, "Link Flap Avoidance in progress\n"); + return bnx2x_avoid_link_flap(params, vars); + } + + DP(NETIF_MSG_LINK, "Cannot avoid link flap lfa_sta=0x%x\n", + lfa_status); + bnx2x_cannot_avoid_link_flap(params, vars, lfa_status); /* Disable attentions */ bnx2x_bits_dis(bp, NIG_REG_MASK_INTERRUPT_PORT0 + params->port*4, @@ -12356,13 +12262,12 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars, REG_WR(bp, NIG_REG_EGRESS_EMAC0_OUT_EN + port*4, 0); } - /* Stop BigMac rx */ - if (!CHIP_IS_E3(bp)) - bnx2x_bmac_rx_disable(bp, port); - else { - bnx2x_xmac_disable(params); - bnx2x_umac_disable(params); - } + if (!CHIP_IS_E3(bp)) { + bnx2x_set_bmac_rx(bp, params->chip_id, port, 0); + } else { + bnx2x_set_xmac_rxtx(params, 0); + bnx2x_set_umac_rxtx(params, 0); + } /* Disable emac */ if (!CHIP_IS_E3(bp)) REG_WR(bp, NIG_REG_NIG_EMAC0_EN + port*4, 0); @@ -12420,6 +12325,56 @@ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars, vars->phy_flags = 0; return 0; } +int bnx2x_lfa_reset(struct link_params *params, + struct link_vars *vars) +{ + struct bnx2x *bp = params->bp; + vars->link_up = 0; + vars->phy_flags = 0; + if (!params->lfa_base) + return bnx2x_link_reset(params, vars, 1); + /* + * Activate NIG drain so that during this time the device won't send + * anything while it is unable to response. + */ + REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 1); + + /* + * Close gracefully the gate from BMAC to NIG such that no half packets + * are passed. + */ + if (!CHIP_IS_E3(bp)) + bnx2x_set_bmac_rx(bp, params->chip_id, params->port, 0); + + if (CHIP_IS_E3(bp)) { + bnx2x_set_xmac_rxtx(params, 0); + bnx2x_set_umac_rxtx(params, 0); + } + /* Wait 10ms for the pipe to clean up*/ + usleep_range(10000, 20000); + + /* Clean the NIG-BRB using the network filters in a way that will + * not cut a packet in the middle. + */ + bnx2x_set_rx_filter(params, 0); + + /* + * Re-open the gate between the BMAC and the NIG, after verifying the + * gate to the BRB is closed, otherwise packets may arrive to the + * firmware before driver had initialized it. The target is to achieve + * minimum management protocol down time. + */ + if (!CHIP_IS_E3(bp)) + bnx2x_set_bmac_rx(bp, params->chip_id, params->port, 1); + + if (CHIP_IS_E3(bp)) { + bnx2x_set_xmac_rxtx(params, 1); + bnx2x_set_umac_rxtx(params, 1); + } + /* Disable NIG drain */ + REG_WR(bp, NIG_REG_EGRESS_DRAIN0_MODE + params->port*4, 0); + return 0; +} /****************************************************************************/ /* Common function */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h index 51cac813005..9165b89a4b1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.h @@ -155,7 +155,7 @@ struct bnx2x_phy { #define FLAGS_DUMMY_READ (1<<9) #define FLAGS_MDC_MDIO_WA_B0 (1<<10) #define FLAGS_TX_ERROR_CHECK (1<<12) -#define FLAGS_EEE_10GBT (1<<13) +#define FLAGS_EEE (1<<13) /* preemphasis values for the rx side */ u16 rx_preemphasis[4]; @@ -216,6 +216,7 @@ struct bnx2x_phy { phy_specific_func_t phy_specific_func; #define DISABLE_TX 1 #define ENABLE_TX 2 +#define PHY_INIT 3 }; /* Inputs parameters to the CLC */ @@ -304,6 +305,8 @@ struct link_params { struct bnx2x *bp; u16 req_fc_auto_adv; /* Should be set to TX / BOTH when req_flow_ctrl is set to AUTO */ + u16 rsrv1; + u32 lfa_base; }; /* Output parameters */ @@ -356,7 +359,7 @@ int bnx2x_phy_init(struct link_params *params, struct link_vars *vars); to 0 */ int bnx2x_link_reset(struct link_params *params, struct link_vars *vars, u8 reset_ext_phy); - +int bnx2x_lfa_reset(struct link_params *params, struct link_vars *vars); /* bnx2x_link_update should be called upon link interrupt */ int bnx2x_link_update(struct link_params *params, struct link_vars *vars); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 0875ecfe337..f7ed122f407 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -1162,14 +1162,9 @@ static int bnx2x_send_final_clnup(struct bnx2x *bp, u8 clnup_func, static u8 bnx2x_is_pcie_pending(struct pci_dev *dev) { - int pos; u16 status; - pos = pci_pcie_cap(dev); - if (!pos) - return false; - - pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, &status); + pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &status); return status & PCI_EXP_DEVSTA_TRPND; } @@ -2171,7 +2166,6 @@ void bnx2x_link_set(struct bnx2x *bp) { if (!BP_NOMCP(bp)) { bnx2x_acquire_phy_lock(bp); - bnx2x_link_reset(&bp->link_params, &bp->link_vars, 1); bnx2x_phy_init(&bp->link_params, &bp->link_vars); bnx2x_release_phy_lock(bp); @@ -2184,12 +2178,19 @@ static void bnx2x__link_reset(struct bnx2x *bp) { if (!BP_NOMCP(bp)) { bnx2x_acquire_phy_lock(bp); - bnx2x_link_reset(&bp->link_params, &bp->link_vars, 1); + bnx2x_lfa_reset(&bp->link_params, &bp->link_vars); bnx2x_release_phy_lock(bp); } else BNX2X_ERR("Bootcode is missing - can not reset link\n"); } +void bnx2x_force_link_reset(struct bnx2x *bp) +{ + bnx2x_acquire_phy_lock(bp); + bnx2x_link_reset(&bp->link_params, &bp->link_vars, 1); + bnx2x_release_phy_lock(bp); +} + u8 bnx2x_link_test(struct bnx2x *bp, u8 is_serdes) { u8 rc = 0; @@ -6135,8 +6136,7 @@ static void bnx2x_init_pxp(struct bnx2x *bp) u16 devctl; int r_order, w_order; - pci_read_config_word(bp->pdev, - pci_pcie_cap(bp->pdev) + PCI_EXP_DEVCTL, &devctl); + pcie_capability_read_word(bp->pdev, PCI_EXP_DEVCTL, &devctl); DP(NETIF_MSG_HW, "read 0x%x from devctl\n", devctl); w_order = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5); if (bp->mrrs == -1) @@ -6757,7 +6757,6 @@ static int bnx2x_init_hw_port(struct bnx2x *bp) u32 low, high; u32 val; - bnx2x__link_reset(bp); DP(NETIF_MSG_HW, "starting port init port %d\n", port); @@ -8250,12 +8249,15 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode) * bnx2x_send_unload_done - send UNLOAD_DONE command to the MCP. * * @bp: driver handle + * @keep_link: true iff link should be kept up */ -void bnx2x_send_unload_done(struct bnx2x *bp) +void bnx2x_send_unload_done(struct bnx2x *bp, bool keep_link) { + u32 reset_param = keep_link ? DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET : 0; + /* Report UNLOAD_DONE to MCP */ if (!BP_NOMCP(bp)) - bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0); + bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, reset_param); } static int bnx2x_func_wait_started(struct bnx2x *bp) @@ -8324,7 +8326,7 @@ static int bnx2x_func_wait_started(struct bnx2x *bp) return 0; } -void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode) +void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) { int port = BP_PORT(bp); int i, rc = 0; @@ -8446,7 +8448,7 @@ unload_error: /* Report UNLOAD_DONE to MCP */ - bnx2x_send_unload_done(bp); + bnx2x_send_unload_done(bp, keep_link); } void bnx2x_disable_close_the_gate(struct bnx2x *bp) @@ -8858,7 +8860,8 @@ int bnx2x_leader_reset(struct bnx2x *bp) * driver is owner of the HW */ if (!global && !BP_NOMCP(bp)) { - load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, 0); + load_code = bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_REQ, + DRV_MSG_CODE_LOAD_REQ_WITH_LFA); if (!load_code) { BNX2X_ERR("MCP response failure, aborting\n"); rc = -EAGAIN; @@ -8964,7 +8967,7 @@ static void bnx2x_parity_recover(struct bnx2x *bp) /* Stop the driver */ /* If interface has been removed - break */ - if (bnx2x_nic_unload(bp, UNLOAD_RECOVERY)) + if (bnx2x_nic_unload(bp, UNLOAD_RECOVERY, false)) return; bp->recovery_state = BNX2X_RECOVERY_WAIT; @@ -9130,7 +9133,7 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work) bp->sp_rtnl_state = 0; smp_mb(); - bnx2x_nic_unload(bp, UNLOAD_NORMAL); + bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); bnx2x_nic_load(bp, LOAD_NORMAL); goto sp_rtnl_exit; @@ -9316,7 +9319,8 @@ static void __devinit bnx2x_prev_unload_undi_inc(struct bnx2x *bp, u8 port, static int __devinit bnx2x_prev_mcp_done(struct bnx2x *bp) { - u32 rc = bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, 0); + u32 rc = bnx2x_fw_command(bp, DRV_MSG_CODE_UNLOAD_DONE, + DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET); if (!rc) { BNX2X_ERR("MCP response failure, aborting\n"); return -EBUSY; @@ -9380,7 +9384,7 @@ static int __devinit bnx2x_prev_mark_path(struct bnx2x *bp) static int __devinit bnx2x_do_flr(struct bnx2x *bp) { - int i, pos; + int i; u16 status; struct pci_dev *dev = bp->pdev; @@ -9397,16 +9401,12 @@ static int __devinit bnx2x_do_flr(struct bnx2x *bp) return -EINVAL; } - pos = pci_pcie_cap(dev); - if (!pos) - return -ENOTTY; - /* Wait for Transaction Pending bit clean */ for (i = 0; i < 4; i++) { if (i) msleep((1 << (i - 1)) * 100); - pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, &status); + pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &status); if (!(status & PCI_EXP_DEVSTA_TRPND)) goto clear; } @@ -11010,7 +11010,7 @@ static int bnx2x_close(struct net_device *dev) struct bnx2x *bp = netdev_priv(dev); /* Unload the driver, release IRQs */ - bnx2x_nic_unload(bp, UNLOAD_CLOSE); + bnx2x_nic_unload(bp, UNLOAD_CLOSE, false); /* Power off */ bnx2x_set_power_state(bp, PCI_D3hot); @@ -12167,7 +12167,7 @@ static void bnx2x_io_resume(struct pci_dev *pdev) rtnl_unlock(); } -static struct pci_error_handlers bnx2x_err_handler = { +static const struct pci_error_handlers bnx2x_err_handler = { .error_detected = bnx2x_io_error_detected, .slot_reset = bnx2x_io_slot_reset, .resume = bnx2x_io_resume, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index 28a0bcfe61f..1b1999d34c7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -4949,6 +4949,10 @@ #define UMAC_COMMAND_CONFIG_REG_SW_RESET (0x1<<13) #define UMAC_COMMAND_CONFIG_REG_TX_ENA (0x1<<0) #define UMAC_REG_COMMAND_CONFIG 0x8 +/* [RW 16] This is the duration for which MAC must wait to go back to ACTIVE + * state from LPI state when it receives packet for transmission. The + * decrement unit is 1 micro-second. */ +#define UMAC_REG_EEE_WAKE_TIMER 0x6c /* [RW 32] Register Bit 0 refers to Bit 16 of the MAC address; Bit 1 refers * to bit 17 of the MAC address etc. */ #define UMAC_REG_MAC_ADDR0 0xc @@ -4958,6 +4962,8 @@ /* [RW 14] Defines a 14-Bit maximum frame length used by the MAC receive * logic to check frames. */ #define UMAC_REG_MAXFR 0x14 +#define UMAC_REG_UMAC_EEE_CTRL 0x64 +#define UMAC_UMAC_EEE_CTRL_REG_EEE_EN (0x1<<3) /* [RW 8] The event id for aggregated interrupt 0 */ #define USDM_REG_AGG_INT_EVENT_0 0xc4038 #define USDM_REG_AGG_INT_EVENT_1 0xc403c @@ -6992,6 +6998,7 @@ Theotherbitsarereservedandshouldbezero*/ /* BCM84833 only */ #define MDIO_84833_TOP_CFG_FW_REV 0x400f #define MDIO_84833_TOP_CFG_FW_EEE 0x10b1 +#define MDIO_84833_TOP_CFG_FW_NO_EEE 0x1f81 #define MDIO_84833_TOP_CFG_XGPHY_STRAP1 0x401a #define MDIO_84833_SUPER_ISOLATE 0x8000 /* These are mailbox register set used by 84833. */ @@ -7160,10 +7167,11 @@ Theotherbitsarereservedandshouldbezero*/ #define MDIO_REG_GPHY_ID_54618SE 0x5cd5 #define MDIO_REG_GPHY_CL45_ADDR_REG 0xd #define MDIO_REG_GPHY_CL45_DATA_REG 0xe -#define MDIO_REG_GPHY_EEE_ADV 0x3c -#define MDIO_REG_GPHY_EEE_1G (0x1 << 2) -#define MDIO_REG_GPHY_EEE_100 (0x1 << 1) #define MDIO_REG_GPHY_EEE_RESOLVED 0x803e +#define MDIO_REG_GPHY_EXP_ACCESS_GATE 0x15 +#define MDIO_REG_GPHY_EXP_ACCESS 0x17 +#define MDIO_REG_GPHY_EXP_ACCESS_TOP 0xd00 +#define MDIO_REG_GPHY_EXP_TOP_2K_BUF 0x40 #define MDIO_REG_GPHY_AUX_STATUS 0x19 #define MDIO_REG_INTR_STATUS 0x1a #define MDIO_REG_INTR_MASK 0x1b diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 62f754bd0df..71971a161bd 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -229,8 +229,7 @@ static inline int bnx2x_exe_queue_step(struct bnx2x *bp, */ list_add_tail(&spacer.link, &o->pending_comp); mb(); - list_del(&elem->link); - list_add_tail(&elem->link, &o->pending_comp); + list_move_tail(&elem->link, &o->pending_comp); list_del(&spacer.link); } else break; @@ -5620,7 +5619,7 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp, memset(rdata, 0, sizeof(*rdata)); /* Fill the ramrod data with provided parameters */ - rdata->function_mode = cpu_to_le16(start_params->mf_mode); + rdata->function_mode = (u8)start_params->mf_mode; rdata->sd_vlan_tag = cpu_to_le16(start_params->sd_vlan_tag); rdata->path_id = BP_PATH(bp); rdata->network_cos_mode = start_params->network_cos_mode; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index f83e033da6d..acf2fe4ca60 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -1321,7 +1321,7 @@ void bnx2x_init_mcast_obj(struct bnx2x *bp, * the current command will be enqueued to the tail of the * pending commands list. * - * Return: 0 is operation was sucessfull and there are no pending completions, + * Return: 0 is operation was successfull and there are no pending completions, * negative if there were errors, positive if there are pending * completions. */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index a1d0446b39b..348ed02d3c6 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -39,14 +39,39 @@ static inline long bnx2x_hilo(u32 *hiref) #endif } -static u16 bnx2x_get_port_stats_dma_len(struct bnx2x *bp) +static inline u16 bnx2x_get_port_stats_dma_len(struct bnx2x *bp) { - u16 res = sizeof(struct host_port_stats) >> 2; + u16 res = 0; - /* if PFC stats are not supported by the MFW, don't DMA them */ - if (!(bp->flags & BC_SUPPORTS_PFC_STATS)) - res -= (sizeof(u32)*4) >> 2; + /* 'newest' convention - shmem2 cotains the size of the port stats */ + if (SHMEM2_HAS(bp, sizeof_port_stats)) { + u32 size = SHMEM2_RD(bp, sizeof_port_stats); + if (size) + res = size; + /* prevent newer BC from causing buffer overflow */ + if (res > sizeof(struct host_port_stats)) + res = sizeof(struct host_port_stats); + } + + /* Older convention - all BCs support the port stats' fields up until + * the 'not_used' field + */ + if (!res) { + res = offsetof(struct host_port_stats, not_used) + 4; + + /* if PFC stats are supported by the MFW, DMA them as well */ + if (bp->flags & BC_SUPPORTS_PFC_STATS) { + res += offsetof(struct host_port_stats, + pfc_frames_rx_lo) - + offsetof(struct host_port_stats, + pfc_frames_tx_hi) + 4 ; + } + } + + res >>= 2; + + WARN_ON(res > 2 * DMAE_LEN32_RD_MAX); return res; } diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 3b4fc61f24c..cc8434fd606 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -823,10 +823,8 @@ static void cnic_free_context(struct cnic_dev *dev) } } -static void __cnic_free_uio(struct cnic_uio_dev *udev) +static void __cnic_free_uio_rings(struct cnic_uio_dev *udev) { - uio_unregister_device(&udev->cnic_uinfo); - if (udev->l2_buf) { dma_free_coherent(&udev->pdev->dev, udev->l2_buf_size, udev->l2_buf, udev->l2_buf_map); @@ -839,6 +837,14 @@ static void __cnic_free_uio(struct cnic_uio_dev *udev) udev->l2_ring = NULL; } +} + +static void __cnic_free_uio(struct cnic_uio_dev *udev) +{ + uio_unregister_device(&udev->cnic_uinfo); + + __cnic_free_uio_rings(udev); + pci_dev_put(udev->pdev); kfree(udev); } @@ -862,6 +868,8 @@ static void cnic_free_resc(struct cnic_dev *dev) if (udev) { udev->dev = NULL; cp->udev = NULL; + if (udev->uio_dev == -1) + __cnic_free_uio_rings(udev); } cnic_free_context(dev); @@ -996,6 +1004,34 @@ static int cnic_alloc_kcq(struct cnic_dev *dev, struct kcq_info *info, return 0; } +static int __cnic_alloc_uio_rings(struct cnic_uio_dev *udev, int pages) +{ + struct cnic_local *cp = udev->dev->cnic_priv; + + if (udev->l2_ring) + return 0; + + udev->l2_ring_size = pages * BCM_PAGE_SIZE; + udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size, + &udev->l2_ring_map, + GFP_KERNEL | __GFP_COMP); + if (!udev->l2_ring) + return -ENOMEM; + + udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size; + udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size); + udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size, + &udev->l2_buf_map, + GFP_KERNEL | __GFP_COMP); + if (!udev->l2_buf) { + __cnic_free_uio_rings(udev); + return -ENOMEM; + } + + return 0; + +} + static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages) { struct cnic_local *cp = dev->cnic_priv; @@ -1005,6 +1041,11 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages) list_for_each_entry(udev, &cnic_udev_list, list) { if (udev->pdev == dev->pcidev) { udev->dev = dev; + if (__cnic_alloc_uio_rings(udev, pages)) { + udev->dev = NULL; + read_unlock(&cnic_dev_lock); + return -ENOMEM; + } cp->udev = udev; read_unlock(&cnic_dev_lock); return 0; @@ -1020,20 +1061,9 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages) udev->dev = dev; udev->pdev = dev->pcidev; - udev->l2_ring_size = pages * BCM_PAGE_SIZE; - udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size, - &udev->l2_ring_map, - GFP_KERNEL | __GFP_COMP); - if (!udev->l2_ring) - goto err_udev; - udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size; - udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size); - udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size, - &udev->l2_buf_map, - GFP_KERNEL | __GFP_COMP); - if (!udev->l2_buf) - goto err_dma; + if (__cnic_alloc_uio_rings(udev, pages)) + goto err_udev; write_lock(&cnic_dev_lock); list_add(&udev->list, &cnic_udev_list); @@ -1044,9 +1074,7 @@ static int cnic_alloc_uio_rings(struct cnic_dev *dev, int pages) cp->udev = udev; return 0; - err_dma: - dma_free_coherent(&udev->pdev->dev, udev->l2_ring_size, - udev->l2_ring, udev->l2_ring_map); + err_udev: kfree(udev); return -ENOMEM; @@ -1260,7 +1288,7 @@ static int cnic_alloc_bnx2x_resc(struct cnic_dev *dev) if (ret) goto error; - if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id)) { + if (CNIC_SUPPORTS_FCOE(cp)) { ret = cnic_alloc_kcq(dev, &cp->kcq2, true); if (ret) goto error; @@ -1275,6 +1303,9 @@ static int cnic_alloc_bnx2x_resc(struct cnic_dev *dev) if (ret) goto error; + if (cp->ethdev->drv_state & CNIC_DRV_STATE_NO_ISCSI) + return 0; + cp->bnx2x_def_status_blk = cp->ethdev->irq_arr[1].status_blk; cp->l2_rx_ring_size = 15; @@ -3050,6 +3081,22 @@ static void cnic_ack_bnx2x_e2_msix(struct cnic_dev *dev) IGU_INT_DISABLE, 0); } +static void cnic_arm_bnx2x_msix(struct cnic_dev *dev, u32 idx) +{ + struct cnic_local *cp = dev->cnic_priv; + + cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, CSTORM_ID, idx, + IGU_INT_ENABLE, 1); +} + +static void cnic_arm_bnx2x_e2_msix(struct cnic_dev *dev, u32 idx) +{ + struct cnic_local *cp = dev->cnic_priv; + + cnic_ack_igu_sb(dev, cp->bnx2x_igu_sb_id, IGU_SEG_ACCESS_DEF, idx, + IGU_INT_ENABLE, 1); +} + static u32 cnic_service_bnx2x_kcq(struct cnic_dev *dev, struct kcq_info *info) { u32 last_status = *info->status_idx_ptr; @@ -3086,9 +3133,8 @@ static void cnic_service_bnx2x_bh(unsigned long data) CNIC_WR16(dev, cp->kcq1.io_addr, cp->kcq1.sw_prod_idx + MAX_KCQ_IDX); - if (!BNX2X_CHIP_IS_E2_PLUS(cp->chip_id)) { - cnic_ack_bnx2x_int(dev, cp->bnx2x_igu_sb_id, USTORM_ID, - status_idx, IGU_INT_ENABLE, 1); + if (cp->ethdev->drv_state & CNIC_DRV_STATE_NO_FCOE) { + cp->arm_int(dev, status_idx); break; } @@ -4845,6 +4891,9 @@ static void cnic_init_bnx2x_tx_ring(struct cnic_dev *dev, buf_map = udev->l2_buf_map; for (i = 0; i < MAX_TX_DESC_CNT; i += 3, txbd += 3) { struct eth_tx_start_bd *start_bd = &txbd->start_bd; + struct eth_tx_parse_bd_e1x *pbd_e1x = + &((txbd + 1)->parse_bd_e1x); + struct eth_tx_parse_bd_e2 *pbd_e2 = &((txbd + 1)->parse_bd_e2); struct eth_tx_bd *reg_bd = &((txbd + 2)->reg_bd); start_bd->addr_hi = cpu_to_le32((u64) buf_map >> 32); @@ -4854,10 +4903,15 @@ static void cnic_init_bnx2x_tx_ring(struct cnic_dev *dev, start_bd->nbytes = cpu_to_le16(0x10); start_bd->nbd = cpu_to_le16(3); start_bd->bd_flags.as_bitfield = ETH_TX_BD_FLAGS_START_BD; - start_bd->general_data = (UNICAST_ADDRESS << - ETH_TX_START_BD_ETH_ADDR_TYPE_SHIFT); + start_bd->general_data &= ~ETH_TX_START_BD_PARSE_NBDS; start_bd->general_data |= (1 << ETH_TX_START_BD_HDR_NBDS_SHIFT); + if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id)) + pbd_e2->parsing_data = (UNICAST_ADDRESS << + ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE_SHIFT); + else + pbd_e1x->global_data = (UNICAST_ADDRESS << + ETH_TX_PARSE_BD_E1X_ETH_ADDR_TYPE_SHIFT); } val = (u64) ring_map >> 32; @@ -5308,7 +5362,7 @@ static void cnic_stop_hw(struct cnic_dev *dev) /* Need to wait for the ring shutdown event to complete * before clearing the CNIC_UP flag. */ - while (cp->udev->uio_dev != -1 && i < 15) { + while (cp->udev && cp->udev->uio_dev != -1 && i < 15) { msleep(100); i++; } @@ -5473,8 +5527,7 @@ static struct cnic_dev *init_bnx2x_cnic(struct net_device *dev) if (!(ethdev->drv_state & CNIC_DRV_STATE_NO_ISCSI)) cdev->max_iscsi_conn = ethdev->max_iscsi_conn; - if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id) && - !(ethdev->drv_state & CNIC_DRV_STATE_NO_FCOE)) + if (CNIC_SUPPORTS_FCOE(cp)) cdev->max_fcoe_conn = ethdev->max_fcoe_conn; if (cdev->max_fcoe_conn > BNX2X_FCOE_NUM_CONNECTIONS) @@ -5492,10 +5545,13 @@ static struct cnic_dev *init_bnx2x_cnic(struct net_device *dev) cp->stop_cm = cnic_cm_stop_bnx2x_hw; cp->enable_int = cnic_enable_bnx2x_int; cp->disable_int_sync = cnic_disable_bnx2x_int_sync; - if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id)) + if (BNX2X_CHIP_IS_E2_PLUS(cp->chip_id)) { cp->ack_int = cnic_ack_bnx2x_e2_msix; - else + cp->arm_int = cnic_arm_bnx2x_e2_msix; + } else { cp->ack_int = cnic_ack_bnx2x_msix; + cp->arm_int = cnic_arm_bnx2x_msix; + } cp->close_conn = cnic_close_bnx2x_conn; return cdev; } diff --git a/drivers/net/ethernet/broadcom/cnic.h b/drivers/net/ethernet/broadcom/cnic.h index 30328097f51..148604c3fa0 100644 --- a/drivers/net/ethernet/broadcom/cnic.h +++ b/drivers/net/ethernet/broadcom/cnic.h @@ -334,6 +334,7 @@ struct cnic_local { void (*enable_int)(struct cnic_dev *); void (*disable_int_sync)(struct cnic_dev *); void (*ack_int)(struct cnic_dev *); + void (*arm_int)(struct cnic_dev *, u32 index); void (*close_conn)(struct cnic_sock *, u32 opcode); }; @@ -474,6 +475,10 @@ struct bnx2x_bd_chain_next { MAX_STAT_COUNTER_ID_E1)) #endif +#define CNIC_SUPPORTS_FCOE(cp) \ + (BNX2X_CHIP_IS_E2_PLUS((cp)->chip_id) && \ + !((cp)->ethdev->drv_state & CNIC_DRV_STATE_NO_FCOE)) + #define CNIC_RAMROD_TMO (HZ / 4) #endif diff --git a/drivers/net/ethernet/broadcom/cnic_defs.h b/drivers/net/ethernet/broadcom/cnic_defs.h index 382c98b0cc0..ede3db35d75 100644 --- a/drivers/net/ethernet/broadcom/cnic_defs.h +++ b/drivers/net/ethernet/broadcom/cnic_defs.h @@ -896,7 +896,7 @@ struct tstorm_tcp_tcp_ag_context_section { u32 snd_nxt; u32 rtt_seq; u32 rtt_time; - u32 __reserved66; + u32 wnd_right_edge_local; u32 wnd_right_edge; u32 tcp_agg_vars1; #define TSTORM_TCP_TCP_AG_CONTEXT_SECTION_FIN_SENT_FLAG (0x1<<0) diff --git a/drivers/net/ethernet/broadcom/cnic_if.h b/drivers/net/ethernet/broadcom/cnic_if.h index 5cb88881bba..865095aad1f 100644 --- a/drivers/net/ethernet/broadcom/cnic_if.h +++ b/drivers/net/ethernet/broadcom/cnic_if.h @@ -14,8 +14,8 @@ #include "bnx2x/bnx2x_mfw_req.h" -#define CNIC_MODULE_VERSION "2.5.12" -#define CNIC_MODULE_RELDATE "June 29, 2012" +#define CNIC_MODULE_VERSION "2.5.14" +#define CNIC_MODULE_RELDATE "Sep 30, 2012" #define CNIC_ULP_RDMA 0 #define CNIC_ULP_ISCSI 1 diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index bf906c51d82..46280ba4c5d 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -44,10 +44,8 @@ #include <linux/prefetch.h> #include <linux/dma-mapping.h> #include <linux/firmware.h> -#if IS_ENABLED(CONFIG_HWMON) #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> -#endif #include <net/checksum.h> #include <net/ip.h> @@ -92,10 +90,10 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) #define DRV_MODULE_NAME "tg3" #define TG3_MAJ_NUM 3 -#define TG3_MIN_NUM 124 +#define TG3_MIN_NUM 125 #define DRV_MODULE_VERSION \ __stringify(TG3_MAJ_NUM) "." __stringify(TG3_MIN_NUM) -#define DRV_MODULE_RELDATE "March 21, 2012" +#define DRV_MODULE_RELDATE "September 26, 2012" #define RESET_KIND_SHUTDOWN 0 #define RESET_KIND_INIT 1 @@ -3653,17 +3651,9 @@ static int tg3_power_down_prepare(struct tg3 *tp) tg3_enable_register_access(tp); /* Restore the CLKREQ setting. */ - if (tg3_flag(tp, CLKREQ_BUG)) { - u16 lnkctl; - - pci_read_config_word(tp->pdev, - pci_pcie_cap(tp->pdev) + PCI_EXP_LNKCTL, - &lnkctl); - lnkctl |= PCI_EXP_LNKCTL_CLKREQ_EN; - pci_write_config_word(tp->pdev, - pci_pcie_cap(tp->pdev) + PCI_EXP_LNKCTL, - lnkctl); - } + if (tg3_flag(tp, CLKREQ_BUG)) + pcie_capability_set_word(tp->pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CLKREQ_EN); misc_host_ctrl = tr32(TG3PCI_MISC_HOST_CTRL); tw32(TG3PCI_MISC_HOST_CTRL, @@ -4434,20 +4424,13 @@ relink: /* Prevent send BD corruption. */ if (tg3_flag(tp, CLKREQ_BUG)) { - u16 oldlnkctl, newlnkctl; - - pci_read_config_word(tp->pdev, - pci_pcie_cap(tp->pdev) + PCI_EXP_LNKCTL, - &oldlnkctl); if (tp->link_config.active_speed == SPEED_100 || tp->link_config.active_speed == SPEED_10) - newlnkctl = oldlnkctl & ~PCI_EXP_LNKCTL_CLKREQ_EN; + pcie_capability_clear_word(tp->pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CLKREQ_EN); else - newlnkctl = oldlnkctl | PCI_EXP_LNKCTL_CLKREQ_EN; - if (newlnkctl != oldlnkctl) - pci_write_config_word(tp->pdev, - pci_pcie_cap(tp->pdev) + - PCI_EXP_LNKCTL, newlnkctl); + pcie_capability_set_word(tp->pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CLKREQ_EN); } if (current_link_up != netif_carrier_ok(tp->dev)) { @@ -6278,7 +6261,7 @@ static int tg3_poll_work(struct tg3_napi *tnapi, int work_done, int budget) u32 jmb_prod_idx = dpr->rx_jmb_prod_idx; tp->rx_refill = false; - for (i = 1; i < tp->irq_cnt; i++) + for (i = 1; i <= tp->rxq_cnt; i++) err |= tg3_rx_prodring_xfer(tp, dpr, &tp->napi[i].prodring); @@ -7607,15 +7590,11 @@ static int tg3_init_rings(struct tg3 *tp) return 0; } -/* - * Must not be invoked with interrupt sources disabled and - * the hardware shutdown down. - */ -static void tg3_free_consistent(struct tg3 *tp) +static void tg3_mem_tx_release(struct tg3 *tp) { int i; - for (i = 0; i < tp->irq_cnt; i++) { + for (i = 0; i < tp->irq_max; i++) { struct tg3_napi *tnapi = &tp->napi[i]; if (tnapi->tx_ring) { @@ -7626,17 +7605,114 @@ static void tg3_free_consistent(struct tg3 *tp) kfree(tnapi->tx_buffers); tnapi->tx_buffers = NULL; + } +} - if (tnapi->rx_rcb) { - dma_free_coherent(&tp->pdev->dev, - TG3_RX_RCB_RING_BYTES(tp), - tnapi->rx_rcb, - tnapi->rx_rcb_mapping); - tnapi->rx_rcb = NULL; - } +static int tg3_mem_tx_acquire(struct tg3 *tp) +{ + int i; + struct tg3_napi *tnapi = &tp->napi[0]; + + /* If multivector TSS is enabled, vector 0 does not handle + * tx interrupts. Don't allocate any resources for it. + */ + if (tg3_flag(tp, ENABLE_TSS)) + tnapi++; + + for (i = 0; i < tp->txq_cnt; i++, tnapi++) { + tnapi->tx_buffers = kzalloc(sizeof(struct tg3_tx_ring_info) * + TG3_TX_RING_SIZE, GFP_KERNEL); + if (!tnapi->tx_buffers) + goto err_out; + + tnapi->tx_ring = dma_alloc_coherent(&tp->pdev->dev, + TG3_TX_RING_BYTES, + &tnapi->tx_desc_mapping, + GFP_KERNEL); + if (!tnapi->tx_ring) + goto err_out; + } + + return 0; + +err_out: + tg3_mem_tx_release(tp); + return -ENOMEM; +} + +static void tg3_mem_rx_release(struct tg3 *tp) +{ + int i; + + for (i = 0; i < tp->irq_max; i++) { + struct tg3_napi *tnapi = &tp->napi[i]; tg3_rx_prodring_fini(tp, &tnapi->prodring); + if (!tnapi->rx_rcb) + continue; + + dma_free_coherent(&tp->pdev->dev, + TG3_RX_RCB_RING_BYTES(tp), + tnapi->rx_rcb, + tnapi->rx_rcb_mapping); + tnapi->rx_rcb = NULL; + } +} + +static int tg3_mem_rx_acquire(struct tg3 *tp) +{ + unsigned int i, limit; + + limit = tp->rxq_cnt; + + /* If RSS is enabled, we need a (dummy) producer ring + * set on vector zero. This is the true hw prodring. + */ + if (tg3_flag(tp, ENABLE_RSS)) + limit++; + + for (i = 0; i < limit; i++) { + struct tg3_napi *tnapi = &tp->napi[i]; + + if (tg3_rx_prodring_init(tp, &tnapi->prodring)) + goto err_out; + + /* If multivector RSS is enabled, vector 0 + * does not handle rx or tx interrupts. + * Don't allocate any resources for it. + */ + if (!i && tg3_flag(tp, ENABLE_RSS)) + continue; + + tnapi->rx_rcb = dma_alloc_coherent(&tp->pdev->dev, + TG3_RX_RCB_RING_BYTES(tp), + &tnapi->rx_rcb_mapping, + GFP_KERNEL); + if (!tnapi->rx_rcb) + goto err_out; + + memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp)); + } + + return 0; + +err_out: + tg3_mem_rx_release(tp); + return -ENOMEM; +} + +/* + * Must not be invoked with interrupt sources disabled and + * the hardware shutdown down. + */ +static void tg3_free_consistent(struct tg3 *tp) +{ + int i; + + for (i = 0; i < tp->irq_cnt; i++) { + struct tg3_napi *tnapi = &tp->napi[i]; + if (tnapi->hw_status) { dma_free_coherent(&tp->pdev->dev, TG3_HW_STATUS_SIZE, tnapi->hw_status, @@ -7645,6 +7721,9 @@ static void tg3_free_consistent(struct tg3 *tp) } } + tg3_mem_rx_release(tp); + tg3_mem_tx_release(tp); + if (tp->hw_stats) { dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats), tp->hw_stats, tp->stats_mapping); @@ -7683,72 +7762,38 @@ static int tg3_alloc_consistent(struct tg3 *tp) memset(tnapi->hw_status, 0, TG3_HW_STATUS_SIZE); sblk = tnapi->hw_status; - if (tg3_rx_prodring_init(tp, &tnapi->prodring)) - goto err_out; + if (tg3_flag(tp, ENABLE_RSS)) { + u16 *prodptr = 0; - /* If multivector TSS is enabled, vector 0 does not handle - * tx interrupts. Don't allocate any resources for it. - */ - if ((!i && !tg3_flag(tp, ENABLE_TSS)) || - (i && tg3_flag(tp, ENABLE_TSS))) { - tnapi->tx_buffers = kzalloc( - sizeof(struct tg3_tx_ring_info) * - TG3_TX_RING_SIZE, GFP_KERNEL); - if (!tnapi->tx_buffers) - goto err_out; - - tnapi->tx_ring = dma_alloc_coherent(&tp->pdev->dev, - TG3_TX_RING_BYTES, - &tnapi->tx_desc_mapping, - GFP_KERNEL); - if (!tnapi->tx_ring) - goto err_out; - } - - /* - * When RSS is enabled, the status block format changes - * slightly. The "rx_jumbo_consumer", "reserved", - * and "rx_mini_consumer" members get mapped to the - * other three rx return ring producer indexes. - */ - switch (i) { - default: - if (tg3_flag(tp, ENABLE_RSS)) { - tnapi->rx_rcb_prod_idx = NULL; + /* + * When RSS is enabled, the status block format changes + * slightly. The "rx_jumbo_consumer", "reserved", + * and "rx_mini_consumer" members get mapped to the + * other three rx return ring producer indexes. + */ + switch (i) { + case 1: + prodptr = &sblk->idx[0].rx_producer; + break; + case 2: + prodptr = &sblk->rx_jumbo_consumer; + break; + case 3: + prodptr = &sblk->reserved; + break; + case 4: + prodptr = &sblk->rx_mini_consumer; break; } - /* Fall through */ - case 1: + tnapi->rx_rcb_prod_idx = prodptr; + } else { tnapi->rx_rcb_prod_idx = &sblk->idx[0].rx_producer; - break; - case 2: - tnapi->rx_rcb_prod_idx = &sblk->rx_jumbo_consumer; - break; - case 3: - tnapi->rx_rcb_prod_idx = &sblk->reserved; - break; - case 4: - tnapi->rx_rcb_prod_idx = &sblk->rx_mini_consumer; - break; } - - /* - * If multivector RSS is enabled, vector 0 does not handle - * rx or tx interrupts. Don't allocate any resources for it. - */ - if (!i && tg3_flag(tp, ENABLE_RSS)) - continue; - - tnapi->rx_rcb = dma_alloc_coherent(&tp->pdev->dev, - TG3_RX_RCB_RING_BYTES(tp), - &tnapi->rx_rcb_mapping, - GFP_KERNEL); - if (!tnapi->rx_rcb) - goto err_out; - - memset(tnapi->rx_rcb, 0, TG3_RX_RCB_RING_BYTES(tp)); } + if (tg3_mem_tx_acquire(tp) || tg3_mem_rx_acquire(tp)) + goto err_out; + return 0; err_out: @@ -8054,7 +8099,7 @@ static int tg3_chip_reset(struct tg3 *tp) udelay(120); - if (tg3_flag(tp, PCI_EXPRESS) && pci_pcie_cap(tp->pdev)) { + if (tg3_flag(tp, PCI_EXPRESS) && pci_is_pcie(tp->pdev)) { u16 val16; if (tp->pci_chip_rev_id == CHIPREV_ID_5750_A0) { @@ -8071,24 +8116,17 @@ static int tg3_chip_reset(struct tg3 *tp) } /* Clear the "no snoop" and "relaxed ordering" bits. */ - pci_read_config_word(tp->pdev, - pci_pcie_cap(tp->pdev) + PCI_EXP_DEVCTL, - &val16); - val16 &= ~(PCI_EXP_DEVCTL_RELAX_EN | - PCI_EXP_DEVCTL_NOSNOOP_EN); + val16 = PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; /* * Older PCIe devices only support the 128 byte * MPS setting. Enforce the restriction. */ if (!tg3_flag(tp, CPMU_PRESENT)) - val16 &= ~PCI_EXP_DEVCTL_PAYLOAD; - pci_write_config_word(tp->pdev, - pci_pcie_cap(tp->pdev) + PCI_EXP_DEVCTL, - val16); + val16 |= PCI_EXP_DEVCTL_PAYLOAD; + pcie_capability_clear_word(tp->pdev, PCI_EXP_DEVCTL, val16); /* Clear error status */ - pci_write_config_word(tp->pdev, - pci_pcie_cap(tp->pdev) + PCI_EXP_DEVSTA, + pcie_capability_write_word(tp->pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_CED | PCI_EXP_DEVSTA_NFED | PCI_EXP_DEVSTA_FED | @@ -8269,9 +8307,10 @@ static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr, nic_addr); } -static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec) + +static void tg3_coal_tx_init(struct tg3 *tp, struct ethtool_coalesce *ec) { - int i; + int i = 0; if (!tg3_flag(tp, ENABLE_TSS)) { tw32(HOSTCC_TXCOL_TICKS, ec->tx_coalesce_usecs); @@ -8281,31 +8320,43 @@ static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec) tw32(HOSTCC_TXCOL_TICKS, 0); tw32(HOSTCC_TXMAX_FRAMES, 0); tw32(HOSTCC_TXCOAL_MAXF_INT, 0); + + for (; i < tp->txq_cnt; i++) { + u32 reg; + + reg = HOSTCC_TXCOL_TICKS_VEC1 + i * 0x18; + tw32(reg, ec->tx_coalesce_usecs); + reg = HOSTCC_TXMAX_FRAMES_VEC1 + i * 0x18; + tw32(reg, ec->tx_max_coalesced_frames); + reg = HOSTCC_TXCOAL_MAXF_INT_VEC1 + i * 0x18; + tw32(reg, ec->tx_max_coalesced_frames_irq); + } } + for (; i < tp->irq_max - 1; i++) { + tw32(HOSTCC_TXCOL_TICKS_VEC1 + i * 0x18, 0); + tw32(HOSTCC_TXMAX_FRAMES_VEC1 + i * 0x18, 0); + tw32(HOSTCC_TXCOAL_MAXF_INT_VEC1 + i * 0x18, 0); + } +} + +static void tg3_coal_rx_init(struct tg3 *tp, struct ethtool_coalesce *ec) +{ + int i = 0; + u32 limit = tp->rxq_cnt; + if (!tg3_flag(tp, ENABLE_RSS)) { tw32(HOSTCC_RXCOL_TICKS, ec->rx_coalesce_usecs); tw32(HOSTCC_RXMAX_FRAMES, ec->rx_max_coalesced_frames); tw32(HOSTCC_RXCOAL_MAXF_INT, ec->rx_max_coalesced_frames_irq); + limit--; } else { tw32(HOSTCC_RXCOL_TICKS, 0); tw32(HOSTCC_RXMAX_FRAMES, 0); tw32(HOSTCC_RXCOAL_MAXF_INT, 0); } - if (!tg3_flag(tp, 5705_PLUS)) { - u32 val = ec->stats_block_coalesce_usecs; - - tw32(HOSTCC_RXCOAL_TICK_INT, ec->rx_coalesce_usecs_irq); - tw32(HOSTCC_TXCOAL_TICK_INT, ec->tx_coalesce_usecs_irq); - - if (!netif_carrier_ok(tp->dev)) - val = 0; - - tw32(HOSTCC_STAT_COAL_TICKS, val); - } - - for (i = 0; i < tp->irq_cnt - 1; i++) { + for (; i < limit; i++) { u32 reg; reg = HOSTCC_RXCOL_TICKS_VEC1 + i * 0x18; @@ -8314,27 +8365,30 @@ static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec) tw32(reg, ec->rx_max_coalesced_frames); reg = HOSTCC_RXCOAL_MAXF_INT_VEC1 + i * 0x18; tw32(reg, ec->rx_max_coalesced_frames_irq); - - if (tg3_flag(tp, ENABLE_TSS)) { - reg = HOSTCC_TXCOL_TICKS_VEC1 + i * 0x18; - tw32(reg, ec->tx_coalesce_usecs); - reg = HOSTCC_TXMAX_FRAMES_VEC1 + i * 0x18; - tw32(reg, ec->tx_max_coalesced_frames); - reg = HOSTCC_TXCOAL_MAXF_INT_VEC1 + i * 0x18; - tw32(reg, ec->tx_max_coalesced_frames_irq); - } } for (; i < tp->irq_max - 1; i++) { tw32(HOSTCC_RXCOL_TICKS_VEC1 + i * 0x18, 0); tw32(HOSTCC_RXMAX_FRAMES_VEC1 + i * 0x18, 0); tw32(HOSTCC_RXCOAL_MAXF_INT_VEC1 + i * 0x18, 0); + } +} - if (tg3_flag(tp, ENABLE_TSS)) { - tw32(HOSTCC_TXCOL_TICKS_VEC1 + i * 0x18, 0); - tw32(HOSTCC_TXMAX_FRAMES_VEC1 + i * 0x18, 0); - tw32(HOSTCC_TXCOAL_MAXF_INT_VEC1 + i * 0x18, 0); - } +static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec) +{ + tg3_coal_tx_init(tp, ec); + tg3_coal_rx_init(tp, ec); + + if (!tg3_flag(tp, 5705_PLUS)) { + u32 val = ec->stats_block_coalesce_usecs; + + tw32(HOSTCC_RXCOAL_TICK_INT, ec->rx_coalesce_usecs_irq); + tw32(HOSTCC_TXCOAL_TICK_INT, ec->tx_coalesce_usecs_irq); + + if (!netif_carrier_ok(tp->dev)) + val = 0; + + tw32(HOSTCC_STAT_COAL_TICKS, val); } } @@ -8592,13 +8646,12 @@ static void __tg3_set_rx_mode(struct net_device *dev) } } -static void tg3_rss_init_dflt_indir_tbl(struct tg3 *tp) +static void tg3_rss_init_dflt_indir_tbl(struct tg3 *tp, u32 qcnt) { int i; for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++) - tp->rss_ind_tbl[i] = - ethtool_rxfh_indir_default(i, tp->irq_cnt - 1); + tp->rss_ind_tbl[i] = ethtool_rxfh_indir_default(i, qcnt); } static void tg3_rss_check_indir_tbl(struct tg3 *tp) @@ -8620,7 +8673,7 @@ static void tg3_rss_check_indir_tbl(struct tg3 *tp) } if (i != TG3_RSS_INDIR_TBL_SIZE) - tg3_rss_init_dflt_indir_tbl(tp); + tg3_rss_init_dflt_indir_tbl(tp, tp->rxq_cnt); } static void tg3_rss_write_indir_tbl(struct tg3 *tp) @@ -9517,7 +9570,6 @@ static int tg3_init_hw(struct tg3 *tp, int reset_phy) return tg3_reset_hw(tp, reset_phy); } -#if IS_ENABLED(CONFIG_HWMON) static void tg3_sd_scan_scratchpad(struct tg3 *tp, struct tg3_ocir *ocir) { int i; @@ -9570,22 +9622,17 @@ static const struct attribute_group tg3_group = { .attrs = tg3_attributes, }; -#endif - static void tg3_hwmon_close(struct tg3 *tp) { -#if IS_ENABLED(CONFIG_HWMON) if (tp->hwmon_dev) { hwmon_device_unregister(tp->hwmon_dev); tp->hwmon_dev = NULL; sysfs_remove_group(&tp->pdev->dev.kobj, &tg3_group); } -#endif } static void tg3_hwmon_open(struct tg3 *tp) { -#if IS_ENABLED(CONFIG_HWMON) int i, err; u32 size = 0; struct pci_dev *pdev = tp->pdev; @@ -9617,7 +9664,6 @@ static void tg3_hwmon_open(struct tg3 *tp) dev_err(&pdev->dev, "Cannot register hwmon device, aborting\n"); sysfs_remove_group(&pdev->dev.kobj, &tg3_group); } -#endif } @@ -10141,21 +10187,43 @@ static int tg3_request_firmware(struct tg3 *tp) return 0; } -static bool tg3_enable_msix(struct tg3 *tp) +static u32 tg3_irq_count(struct tg3 *tp) { - int i, rc; - struct msix_entry msix_ent[tp->irq_max]; + u32 irq_cnt = max(tp->rxq_cnt, tp->txq_cnt); - tp->irq_cnt = netif_get_num_default_rss_queues(); - if (tp->irq_cnt > 1) { + if (irq_cnt > 1) { /* We want as many rx rings enabled as there are cpus. * In multiqueue MSI-X mode, the first MSI-X vector * only deals with link interrupts, etc, so we add * one to the number of vectors we are requesting. */ - tp->irq_cnt = min_t(unsigned, tp->irq_cnt + 1, tp->irq_max); + irq_cnt = min_t(unsigned, irq_cnt + 1, tp->irq_max); } + return irq_cnt; +} + +static bool tg3_enable_msix(struct tg3 *tp) +{ + int i, rc; + struct msix_entry msix_ent[tp->irq_max]; + + tp->txq_cnt = tp->txq_req; + tp->rxq_cnt = tp->rxq_req; + if (!tp->rxq_cnt) + tp->rxq_cnt = netif_get_num_default_rss_queues(); + if (tp->rxq_cnt > tp->rxq_max) + tp->rxq_cnt = tp->rxq_max; + + /* Disable multiple TX rings by default. Simple round-robin hardware + * scheduling of the TX rings can cause starvation of rings with + * small packets when other rings have TSO or jumbo packets. + */ + if (!tp->txq_req) + tp->txq_cnt = 1; + + tp->irq_cnt = tg3_irq_count(tp); + for (i = 0; i < tp->irq_max; i++) { msix_ent[i].entry = i; msix_ent[i].vector = 0; @@ -10170,27 +10238,28 @@ static bool tg3_enable_msix(struct tg3 *tp) netdev_notice(tp->dev, "Requested %d MSI-X vectors, received %d\n", tp->irq_cnt, rc); tp->irq_cnt = rc; + tp->rxq_cnt = max(rc - 1, 1); + if (tp->txq_cnt) + tp->txq_cnt = min(tp->rxq_cnt, tp->txq_max); } for (i = 0; i < tp->irq_max; i++) tp->napi[i].irq_vec = msix_ent[i].vector; - netif_set_real_num_tx_queues(tp->dev, 1); - rc = tp->irq_cnt > 1 ? tp->irq_cnt - 1 : 1; - if (netif_set_real_num_rx_queues(tp->dev, rc)) { + if (netif_set_real_num_rx_queues(tp->dev, tp->rxq_cnt)) { pci_disable_msix(tp->pdev); return false; } - if (tp->irq_cnt > 1) { - tg3_flag_set(tp, ENABLE_RSS); + if (tp->irq_cnt == 1) + return true; - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720) { - tg3_flag_set(tp, ENABLE_TSS); - netif_set_real_num_tx_queues(tp->dev, tp->irq_cnt - 1); - } - } + tg3_flag_set(tp, ENABLE_RSS); + + if (tp->txq_cnt > 1) + tg3_flag_set(tp, ENABLE_TSS); + + netif_set_real_num_tx_queues(tp->dev, tp->txq_cnt); return true; } @@ -10224,6 +10293,11 @@ defcfg: if (!tg3_flag(tp, USING_MSIX)) { tp->irq_cnt = 1; tp->napi[0].irq_vec = tp->pdev->irq; + } + + if (tp->irq_cnt == 1) { + tp->txq_cnt = 1; + tp->rxq_cnt = 1; netif_set_real_num_tx_queues(tp->dev, 1); netif_set_real_num_rx_queues(tp->dev, 1); } @@ -10241,38 +10315,11 @@ static void tg3_ints_fini(struct tg3 *tp) tg3_flag_clear(tp, ENABLE_TSS); } -static int tg3_open(struct net_device *dev) +static int tg3_start(struct tg3 *tp, bool reset_phy, bool test_irq) { - struct tg3 *tp = netdev_priv(dev); + struct net_device *dev = tp->dev; int i, err; - if (tp->fw_needed) { - err = tg3_request_firmware(tp); - if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0) { - if (err) - return err; - } else if (err) { - netdev_warn(tp->dev, "TSO capability disabled\n"); - tg3_flag_clear(tp, TSO_CAPABLE); - } else if (!tg3_flag(tp, TSO_CAPABLE)) { - netdev_notice(tp->dev, "TSO capability restored\n"); - tg3_flag_set(tp, TSO_CAPABLE); - } - } - - netif_carrier_off(tp->dev); - - err = tg3_power_up(tp); - if (err) - return err; - - tg3_full_lock(tp, 0); - - tg3_disable_ints(tp); - tg3_flag_clear(tp, INIT_COMPLETE); - - tg3_full_unlock(tp); - /* * Setup interrupts first so we know how * many NAPI resources to allocate @@ -10306,7 +10353,7 @@ static int tg3_open(struct net_device *dev) tg3_full_lock(tp, 0); - err = tg3_init_hw(tp, 1); + err = tg3_init_hw(tp, reset_phy); if (err) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); tg3_free_rings(tp); @@ -10317,7 +10364,7 @@ static int tg3_open(struct net_device *dev) if (err) goto err_out3; - if (tg3_flag(tp, USING_MSI)) { + if (test_irq && tg3_flag(tp, USING_MSI)) { err = tg3_test_msi(tp); if (err) { @@ -10373,20 +10420,18 @@ err_out2: err_out1: tg3_ints_fini(tp); - tg3_frob_aux_power(tp, false); - pci_set_power_state(tp->pdev, PCI_D3hot); + return err; } -static int tg3_close(struct net_device *dev) +static void tg3_stop(struct tg3 *tp) { int i; - struct tg3 *tp = netdev_priv(dev); tg3_napi_disable(tp); tg3_reset_task_cancel(tp); - netif_tx_stop_all_queues(dev); + netif_tx_disable(tp->dev); tg3_timer_stop(tp); @@ -10411,13 +10456,60 @@ static int tg3_close(struct net_device *dev) tg3_ints_fini(tp); - /* Clear stats across close / open calls */ - memset(&tp->net_stats_prev, 0, sizeof(tp->net_stats_prev)); - memset(&tp->estats_prev, 0, sizeof(tp->estats_prev)); - tg3_napi_fini(tp); tg3_free_consistent(tp); +} + +static int tg3_open(struct net_device *dev) +{ + struct tg3 *tp = netdev_priv(dev); + int err; + + if (tp->fw_needed) { + err = tg3_request_firmware(tp); + if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0) { + if (err) + return err; + } else if (err) { + netdev_warn(tp->dev, "TSO capability disabled\n"); + tg3_flag_clear(tp, TSO_CAPABLE); + } else if (!tg3_flag(tp, TSO_CAPABLE)) { + netdev_notice(tp->dev, "TSO capability restored\n"); + tg3_flag_set(tp, TSO_CAPABLE); + } + } + + netif_carrier_off(tp->dev); + + err = tg3_power_up(tp); + if (err) + return err; + + tg3_full_lock(tp, 0); + + tg3_disable_ints(tp); + tg3_flag_clear(tp, INIT_COMPLETE); + + tg3_full_unlock(tp); + + err = tg3_start(tp, true, true); + if (err) { + tg3_frob_aux_power(tp, false); + pci_set_power_state(tp->pdev, PCI_D3hot); + } + return err; +} + +static int tg3_close(struct net_device *dev) +{ + struct tg3 *tp = netdev_priv(dev); + + tg3_stop(tp); + + /* Clear stats across close / open calls */ + memset(&tp->net_stats_prev, 0, sizeof(tp->net_stats_prev)); + memset(&tp->estats_prev, 0, sizeof(tp->estats_prev)); tg3_power_down(tp); @@ -11207,11 +11299,11 @@ static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, switch (info->cmd) { case ETHTOOL_GRXRINGS: if (netif_running(tp->dev)) - info->data = tp->irq_cnt; + info->data = tp->rxq_cnt; else { info->data = num_online_cpus(); - if (info->data > TG3_IRQ_MAX_VECS_RSS) - info->data = TG3_IRQ_MAX_VECS_RSS; + if (info->data > TG3_RSS_MAX_NUM_QS) + info->data = TG3_RSS_MAX_NUM_QS; } /* The first interrupt vector only @@ -11268,6 +11360,58 @@ static int tg3_set_rxfh_indir(struct net_device *dev, const u32 *indir) return 0; } +static void tg3_get_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct tg3 *tp = netdev_priv(dev); + u32 deflt_qs = netif_get_num_default_rss_queues(); + + channel->max_rx = tp->rxq_max; + channel->max_tx = tp->txq_max; + + if (netif_running(dev)) { + channel->rx_count = tp->rxq_cnt; + channel->tx_count = tp->txq_cnt; + } else { + if (tp->rxq_req) + channel->rx_count = tp->rxq_req; + else + channel->rx_count = min(deflt_qs, tp->rxq_max); + + if (tp->txq_req) + channel->tx_count = tp->txq_req; + else + channel->tx_count = min(deflt_qs, tp->txq_max); + } +} + +static int tg3_set_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct tg3 *tp = netdev_priv(dev); + + if (!tg3_flag(tp, SUPPORT_MSIX)) + return -EOPNOTSUPP; + + if (channel->rx_count > tp->rxq_max || + channel->tx_count > tp->txq_max) + return -EINVAL; + + tp->rxq_req = channel->rx_count; + tp->txq_req = channel->tx_count; + + if (!netif_running(dev)) + return 0; + + tg3_stop(tp); + + netif_carrier_off(dev); + + tg3_start(tp, true, false); + + return 0; +} + static void tg3_get_strings(struct net_device *dev, u32 stringset, u8 *buf) { switch (stringset) { @@ -12516,6 +12660,8 @@ static const struct ethtool_ops tg3_ethtool_ops = { .get_rxfh_indir_size = tg3_get_rxfh_indir_size, .get_rxfh_indir = tg3_get_rxfh_indir, .set_rxfh_indir = tg3_set_rxfh_indir, + .get_channels = tg3_get_channels, + .set_channels = tg3_set_channels, .get_ts_info = ethtool_op_get_ts_info, }; @@ -14532,10 +14678,20 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) if (tg3_flag(tp, 57765_PLUS)) { tg3_flag_set(tp, SUPPORT_MSIX); tp->irq_max = TG3_IRQ_MAX_VECS; - tg3_rss_init_dflt_indir_tbl(tp); } } + tp->txq_max = 1; + tp->rxq_max = 1; + if (tp->irq_max > 1) { + tp->rxq_max = TG3_RSS_MAX_NUM_QS; + tg3_rss_init_dflt_indir_tbl(tp, TG3_RSS_MAX_NUM_QS); + + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5720) + tp->txq_max = tp->irq_max - 1; + } + if (tg3_flag(tp, 5755_PLUS) || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) tg3_flag_set(tp, SHORT_DMA_BUG); @@ -14565,9 +14721,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tg3_flag_set(tp, PCI_EXPRESS); - pci_read_config_word(tp->pdev, - pci_pcie_cap(tp->pdev) + PCI_EXP_LNKCTL, - &lnkctl); + pcie_capability_read_word(tp->pdev, PCI_EXP_LNKCTL, &lnkctl); if (lnkctl & PCI_EXP_LNKCTL_CLKREQ_EN) { if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { @@ -16397,7 +16551,7 @@ done: rtnl_unlock(); } -static struct pci_error_handlers tg3_err_handler = { +static const struct pci_error_handlers tg3_err_handler = { .error_detected = tg3_io_error_detected, .slot_reset = tg3_io_slot_reset, .resume = tg3_io_resume diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index 6d52cb28682..d9308c32102 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2860,7 +2860,8 @@ struct tg3_rx_prodring_set { dma_addr_t rx_jmb_mapping; }; -#define TG3_IRQ_MAX_VECS_RSS 5 +#define TG3_RSS_MAX_NUM_QS 4 +#define TG3_IRQ_MAX_VECS_RSS (TG3_RSS_MAX_NUM_QS + 1) #define TG3_IRQ_MAX_VECS TG3_IRQ_MAX_VECS_RSS struct tg3_napi { @@ -3037,6 +3038,9 @@ struct tg3 { void (*write32_tx_mbox) (struct tg3 *, u32, u32); u32 dma_limit; + u32 txq_req; + u32 txq_cnt; + u32 txq_max; /* begin "rx thread" cacheline section */ struct tg3_napi napi[TG3_IRQ_MAX_VECS]; @@ -3051,6 +3055,9 @@ struct tg3 { u32 rx_std_max_post; u32 rx_offset; u32 rx_pkt_map_sz; + u32 rxq_req; + u32 rxq_cnt; + u32 rxq_max; bool rx_refill; diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index b441f33258e..ce1eac52947 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -3268,6 +3268,7 @@ bnad_pci_probe(struct pci_dev *pdev, * Output : using_dac = 1 for 64 bit DMA * = 0 for 32 bit DMA */ + using_dac = false; err = bnad_pci_init(bnad, pdev, &using_dac); if (err) goto unlock_mutex; diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 6505070abcf..9c9f3260344 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -1394,7 +1394,7 @@ static int offload_close(struct t3cdev *tdev) sysfs_remove_group(&tdev->lldev->dev.kobj, &offload_attr_group); /* Flush work scheduled while releasing TIDs */ - flush_work_sync(&td->tid_release_task); + flush_work(&td->tid_release_task); tdev->lldev = NULL; cxgb3_set_dummy_ops(tdev); @@ -3036,7 +3036,7 @@ static void t3_io_resume(struct pci_dev *pdev) t3_resume_ports(adapter); } -static struct pci_error_handlers t3_err_handler = { +static const struct pci_error_handlers t3_err_handler = { .error_detected = t3_io_error_detected, .slot_reset = t3_io_slot_reset, .resume = t3_io_resume, diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index bff8a3cdd3d..aef45d3113b 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -3289,22 +3289,18 @@ static void config_pcie(struct adapter *adap) unsigned int log2_width, pldsize; unsigned int fst_trn_rx, fst_trn_tx, acklat, rpllmt; - pci_read_config_word(adap->pdev, - adap->pdev->pcie_cap + PCI_EXP_DEVCTL, - &val); + pcie_capability_read_word(adap->pdev, PCI_EXP_DEVCTL, &val); pldsize = (val & PCI_EXP_DEVCTL_PAYLOAD) >> 5; pci_read_config_word(adap->pdev, 0x2, &devid); if (devid == 0x37) { - pci_write_config_word(adap->pdev, - adap->pdev->pcie_cap + PCI_EXP_DEVCTL, - val & ~PCI_EXP_DEVCTL_READRQ & - ~PCI_EXP_DEVCTL_PAYLOAD); + pcie_capability_write_word(adap->pdev, PCI_EXP_DEVCTL, + val & ~PCI_EXP_DEVCTL_READRQ & + ~PCI_EXP_DEVCTL_PAYLOAD); pldsize = 0; } - pci_read_config_word(adap->pdev, adap->pdev->pcie_cap + PCI_EXP_LNKCTL, - &val); + pcie_capability_read_word(adap->pdev, PCI_EXP_LNKCTL, &val); fst_trn_tx = G_NUMFSTTRNSEQ(t3_read_reg(adap, A_PCIE_PEX_CTRL0)); fst_trn_rx = adap->params.rev == 0 ? fst_trn_tx : @@ -3425,15 +3421,13 @@ out_err: static void get_pci_mode(struct adapter *adapter, struct pci_params *p) { static unsigned short speed_map[] = { 33, 66, 100, 133 }; - u32 pci_mode, pcie_cap; + u32 pci_mode; - pcie_cap = pci_pcie_cap(adapter->pdev); - if (pcie_cap) { + if (pci_is_pcie(adapter->pdev)) { u16 val; p->variant = PCI_VARIANT_PCIE; - pci_read_config_word(adapter->pdev, pcie_cap + PCI_EXP_LNKSTA, - &val); + pcie_capability_read_word(adapter->pdev, PCI_EXP_LNKSTA, &val); p->width = (val >> 4) & 0x3f; return; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index ec2dafe8ae5..745a1f53361 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -67,12 +67,12 @@ enum { }; enum { - MEMWIN0_APERTURE = 65536, - MEMWIN0_BASE = 0x30000, + MEMWIN0_APERTURE = 2048, + MEMWIN0_BASE = 0x1b800, MEMWIN1_APERTURE = 32768, MEMWIN1_BASE = 0x28000, - MEMWIN2_APERTURE = 2048, - MEMWIN2_BASE = 0x1b800, + MEMWIN2_APERTURE = 65536, + MEMWIN2_BASE = 0x30000, }; enum dev_master { @@ -211,6 +211,9 @@ struct tp_err_stats { struct tp_params { unsigned int ntxchan; /* # of Tx channels */ unsigned int tre; /* log2 of core clocks per TP tick */ + + uint32_t dack_re; /* DACK timer resolution */ + unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */ }; struct vpd_params { @@ -315,6 +318,10 @@ enum { /* adapter flags */ USING_MSI = (1 << 1), USING_MSIX = (1 << 2), FW_OK = (1 << 4), + RSS_TNLALLLOOKUP = (1 << 5), + USING_SOFT_PARAMS = (1 << 6), + MASTER_PF = (1 << 7), + FW_OFLD_CONN = (1 << 9), }; struct rx_sw_desc; @@ -467,6 +474,11 @@ struct sge { u16 rdma_rxq[NCHAN]; u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; + u32 fl_pg_order; /* large page allocation size */ + u32 stat_len; /* length of status page at ring end */ + u32 pktshift; /* padding between CPL & packet data */ + u32 fl_align; /* response queue message alignment */ + u32 fl_starve_thres; /* Free List starvation threshold */ unsigned int starve_thres; u8 idma_state[2]; unsigned int egr_start; @@ -511,6 +523,8 @@ struct adapter { struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + unsigned int l2t_start; + unsigned int l2t_end; struct l2t_data *l2t; void *uld_handle[CXGB4_ULD_MAX]; struct list_head list_node; @@ -619,7 +633,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid); irqreturn_t t4_sge_intr_msix(int irq, void *cookie); -void t4_sge_init(struct adapter *adap); +int t4_sge_init(struct adapter *adap); void t4_sge_start(struct adapter *adap); void t4_sge_stop(struct adapter *adap); extern int dbfifo_int_thresh; @@ -638,6 +652,14 @@ static inline unsigned int us_to_core_ticks(const struct adapter *adap, return (us * adap->params.vpd.cclk) / 1000; } +static inline unsigned int core_ticks_to_us(const struct adapter *adapter, + unsigned int ticks) +{ + /* add Core Clock / 2 to round ticks to nearest uS */ + return ((ticks * 1000 + adapter->params.vpd.cclk/2) / + adapter->params.vpd.cclk); +} + void t4_set_reg_field(struct adapter *adap, unsigned int addr, u32 mask, u32 val); @@ -656,6 +678,9 @@ static inline int t4_wr_mbox_ns(struct adapter *adap, int mbox, const void *cmd, return t4_wr_mbox_meat(adap, mbox, cmd, size, rpl, false); } +void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, + unsigned int data_reg, const u32 *vals, + unsigned int nregs, unsigned int start_idx); void t4_intr_enable(struct adapter *adapter); void t4_intr_disable(struct adapter *adapter); int t4_slow_intr_handler(struct adapter *adapter); @@ -664,8 +689,12 @@ int t4_wait_dev_ready(struct adapter *adap); int t4_link_start(struct adapter *adap, unsigned int mbox, unsigned int port, struct link_config *lc); int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port); +int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len, + __be32 *buf); int t4_seeprom_wp(struct adapter *adapter, bool enable); +int get_vpd_params(struct adapter *adapter, struct vpd_params *p); int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size); +unsigned int t4_flash_cfg_addr(struct adapter *adapter); int t4_check_fw_version(struct adapter *adapter); int t4_prep_adapter(struct adapter *adapter); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); @@ -680,6 +709,8 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p); void t4_read_mtu_tbl(struct adapter *adap, u16 *mtus, u8 *mtu_log); +void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr, + unsigned int mask, unsigned int val); void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4, struct tp_tcp_stats *v6); void t4_load_mtus(struct adapter *adap, const unsigned short *mtus, @@ -695,6 +726,16 @@ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox, int t4_fw_bye(struct adapter *adap, unsigned int mbox); int t4_early_init(struct adapter *adap, unsigned int mbox); int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset); +int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force); +int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset); +int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, + const u8 *fw_data, unsigned int size, int force); +int t4_fw_config_file(struct adapter *adap, unsigned int mbox, + unsigned int mtype, unsigned int maddr, + u32 *finiver, u32 *finicsum, u32 *cfcsum); +int t4_fixup_host_params(struct adapter *adap, unsigned int page_size, + unsigned int cache_line_size); +int t4_fw_initialize(struct adapter *adap, unsigned int mbox); int t4_query_params(struct adapter *adap, unsigned int mbox, unsigned int pf, unsigned int vf, unsigned int nparams, const u32 *params, u32 *val); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 5ed49af23d6..6b9f6bb2f7e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -78,28 +78,45 @@ */ #define MAX_SGE_TIMERVAL 200U -#ifdef CONFIG_PCI_IOV -/* - * Virtual Function provisioning constants. We need two extra Ingress Queues - * with Interrupt capability to serve as the VF's Firmware Event Queue and - * Forwarded Interrupt Queue (when using MSI mode) -- neither will have Free - * Lists associated with them). For each Ethernet/Control Egress Queue and - * for each Free List, we need an Egress Context. - */ enum { + /* + * Physical Function provisioning constants. + */ + PFRES_NVI = 4, /* # of Virtual Interfaces */ + PFRES_NETHCTRL = 128, /* # of EQs used for ETH or CTRL Qs */ + PFRES_NIQFLINT = 128, /* # of ingress Qs/w Free List(s)/intr + */ + PFRES_NEQ = 256, /* # of egress queues */ + PFRES_NIQ = 0, /* # of ingress queues */ + PFRES_TC = 0, /* PCI-E traffic class */ + PFRES_NEXACTF = 128, /* # of exact MPS filters */ + + PFRES_R_CAPS = FW_CMD_CAP_PF, + PFRES_WX_CAPS = FW_CMD_CAP_PF, + +#ifdef CONFIG_PCI_IOV + /* + * Virtual Function provisioning constants. We need two extra Ingress + * Queues with Interrupt capability to serve as the VF's Firmware + * Event Queue and Forwarded Interrupt Queue (when using MSI mode) -- + * neither will have Free Lists associated with them). For each + * Ethernet/Control Egress Queue and for each Free List, we need an + * Egress Context. + */ VFRES_NPORTS = 1, /* # of "ports" per VF */ VFRES_NQSETS = 2, /* # of "Queue Sets" per VF */ VFRES_NVI = VFRES_NPORTS, /* # of Virtual Interfaces */ VFRES_NETHCTRL = VFRES_NQSETS, /* # of EQs used for ETH or CTRL Qs */ VFRES_NIQFLINT = VFRES_NQSETS+2,/* # of ingress Qs/w Free List(s)/intr */ - VFRES_NIQ = 0, /* # of non-fl/int ingress queues */ VFRES_NEQ = VFRES_NQSETS*2, /* # of egress queues */ + VFRES_NIQ = 0, /* # of non-fl/int ingress queues */ VFRES_TC = 0, /* PCI-E traffic class */ VFRES_NEXACTF = 16, /* # of exact MPS filters */ VFRES_R_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF|FW_CMD_CAP_PORT, VFRES_WX_CAPS = FW_CMD_CAP_DMAQ|FW_CMD_CAP_VF, +#endif }; /* @@ -146,7 +163,6 @@ static unsigned int pfvfres_pmask(struct adapter *adapter, } /*NOTREACHED*/ } -#endif enum { MAX_TXQ_ENTRIES = 16384, @@ -193,6 +209,7 @@ static DEFINE_PCI_DEVICE_TABLE(cxgb4_pci_tbl) = { }; #define FW_FNAME "cxgb4/t4fw.bin" +#define FW_CFNAME "cxgb4/t4-config.txt" MODULE_DESCRIPTION(DRV_DESC); MODULE_AUTHOR("Chelsio Communications"); @@ -201,6 +218,28 @@ MODULE_VERSION(DRV_VERSION); MODULE_DEVICE_TABLE(pci, cxgb4_pci_tbl); MODULE_FIRMWARE(FW_FNAME); +/* + * Normally we're willing to become the firmware's Master PF but will be happy + * if another PF has already become the Master and initialized the adapter. + * Setting "force_init" will cause this driver to forcibly establish itself as + * the Master PF and initialize the adapter. + */ +static uint force_init; + +module_param(force_init, uint, 0644); +MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter"); + +/* + * Normally if the firmware we connect to has Configuration File support, we + * use that and only fall back to the old Driver-based initialization if the + * Configuration File fails for some reason. If force_old_init is set, then + * we'll always use the old Driver-based initialization sequence. + */ +static uint force_old_init; + +module_param(force_old_init, uint, 0644); +MODULE_PARM_DESC(force_old_init, "Force old initialization sequence"); + static int dflt_msg_enable = DFLT_MSG_ENABLE; module_param(dflt_msg_enable, int, 0644); @@ -236,6 +275,20 @@ module_param_array(intr_cnt, uint, NULL, 0644); MODULE_PARM_DESC(intr_cnt, "thresholds 1..3 for queue interrupt packet counters"); +/* + * Normally we tell the chip to deliver Ingress Packets into our DMA buffers + * offset by 2 bytes in order to have the IP headers line up on 4-byte + * boundaries. This is a requirement for many architectures which will throw + * a machine check fault if an attempt is made to access one of the 4-byte IP + * header fields on a non-4-byte boundary. And it's a major performance issue + * even on some architectures which allow it like some implementations of the + * x86 ISA. However, some architectures don't mind this and for some very + * edge-case performance sensitive applications (like forwarding large volumes + * of small packets), setting this DMA offset to 0 will decrease the number of + * PCI-E Bus transfers enough to measurably affect performance. + */ +static int rx_dma_offset = 2; + static bool vf_acls; #ifdef CONFIG_PCI_IOV @@ -248,6 +301,30 @@ module_param_array(num_vf, uint, NULL, 0644); MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3"); #endif +/* + * The filter TCAM has a fixed portion and a variable portion. The fixed + * portion can match on source/destination IP IPv4/IPv6 addresses and TCP/UDP + * ports. The variable portion is 36 bits which can include things like Exact + * Match MAC Index (9 bits), Ether Type (16 bits), IP Protocol (8 bits), + * [Inner] VLAN Tag (17 bits), etc. which, if all were somehow selected, would + * far exceed the 36-bit budget for this "compressed" header portion of the + * filter. Thus, we have a scarce resource which must be carefully managed. + * + * By default we set this up to mostly match the set of filter matching + * capabilities of T3 but with accommodations for some of T4's more + * interesting features: + * + * { IP Fragment (1), MPS Match Type (3), IP Protocol (8), + * [Inner] VLAN (17), Port (3), FCoE (1) } + */ +enum { + TP_VLAN_PRI_MAP_DEFAULT = HW_TPL_FR_MT_PR_IV_P_FC, + TP_VLAN_PRI_MAP_FIRST = FCOE_SHIFT, + TP_VLAN_PRI_MAP_LAST = FRAGMENTATION_SHIFT, +}; + +static unsigned int tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT; + static struct dentry *cxgb4_debugfs_root; static LIST_HEAD(adapter_list); @@ -852,11 +929,25 @@ static int upgrade_fw(struct adapter *adap) */ if (FW_HDR_FW_VER_MAJOR_GET(adap->params.fw_vers) != FW_VERSION_MAJOR || vers > adap->params.fw_vers) { - ret = -t4_load_fw(adap, fw->data, fw->size); + dev_info(dev, "upgrading firmware ...\n"); + ret = t4_fw_upgrade(adap, adap->mbox, fw->data, fw->size, + /*force=*/false); if (!ret) - dev_info(dev, "firmware upgraded to version %pI4 from " - FW_FNAME "\n", &hdr->fw_ver); + dev_info(dev, "firmware successfully upgraded to " + FW_FNAME " (%d.%d.%d.%d)\n", + FW_HDR_FW_VER_MAJOR_GET(vers), + FW_HDR_FW_VER_MINOR_GET(vers), + FW_HDR_FW_VER_MICRO_GET(vers), + FW_HDR_FW_VER_BUILD_GET(vers)); + else + dev_err(dev, "firmware upgrade failed! err=%d\n", -ret); + } else { + /* + * Tell our caller that we didn't upgrade the firmware. + */ + ret = -EINVAL; } + out: release_firmware(fw); return ret; } @@ -2470,8 +2561,8 @@ int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx, else delta = size - hw_pidx + pidx; wmb(); - t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL), - V_QID(qid) | V_PIDX(delta)); + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), + QID(qid) | PIDX(delta)); } out: return ret; @@ -2579,8 +2670,8 @@ static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q) else delta = q->size - hw_pidx + q->db_pidx; wmb(); - t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL), - V_QID(q->cntxt_id) | V_PIDX(delta)); + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), + QID(q->cntxt_id) | PIDX(delta)); } out: q->db_disabled = 0; @@ -2617,9 +2708,9 @@ static void process_db_full(struct work_struct *work) notify_rdma_uld(adap, CXGB4_CONTROL_DB_FULL); drain_db_fifo(adap, dbfifo_drain_delay); - t4_set_reg_field(adap, A_SGE_INT_ENABLE3, - F_DBFIFO_HP_INT | F_DBFIFO_LP_INT, - F_DBFIFO_HP_INT | F_DBFIFO_LP_INT); + t4_set_reg_field(adap, SGE_INT_ENABLE3, + DBFIFO_HP_INT | DBFIFO_LP_INT, + DBFIFO_HP_INT | DBFIFO_LP_INT); notify_rdma_uld(adap, CXGB4_CONTROL_DB_EMPTY); } @@ -2639,8 +2730,8 @@ static void process_db_drop(struct work_struct *work) void t4_db_full(struct adapter *adap) { - t4_set_reg_field(adap, A_SGE_INT_ENABLE3, - F_DBFIFO_HP_INT | F_DBFIFO_LP_INT, 0); + t4_set_reg_field(adap, SGE_INT_ENABLE3, + DBFIFO_HP_INT | DBFIFO_LP_INT, 0); queue_work(workq, &adap->db_full_task); } @@ -3076,6 +3167,10 @@ static void setup_memwin(struct adapter *adap) t4_write_reg(adap, PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN, 2), (bar0 + MEMWIN2_BASE) | BIR(0) | WINDOW(ilog2(MEMWIN2_APERTURE) - 10)); +} + +static void setup_memwin_rdma(struct adapter *adap) +{ if (adap->vres.ocq.size) { unsigned int start, sz_kb; @@ -3155,6 +3250,488 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c) /* * Phase 0 of initialization: contact FW, obtain config, perform basic init. + * + * If the firmware we're dealing with has Configuration File support, then + * we use that to perform all configuration + */ + +/* + * Tweak configuration based on module parameters, etc. Most of these have + * defaults assigned to them by Firmware Configuration Files (if we're using + * them) but need to be explicitly set if we're using hard-coded + * initialization. But even in the case of using Firmware Configuration + * Files, we'd like to expose the ability to change these via module + * parameters so these are essentially common tweaks/settings for + * Configuration Files and hard-coded initialization ... + */ +static int adap_init0_tweaks(struct adapter *adapter) +{ + /* + * Fix up various Host-Dependent Parameters like Page Size, Cache + * Line Size, etc. The firmware default is for a 4KB Page Size and + * 64B Cache Line Size ... + */ + t4_fixup_host_params(adapter, PAGE_SIZE, L1_CACHE_BYTES); + + /* + * Process module parameters which affect early initialization. + */ + if (rx_dma_offset != 2 && rx_dma_offset != 0) { + dev_err(&adapter->pdev->dev, + "Ignoring illegal rx_dma_offset=%d, using 2\n", + rx_dma_offset); + rx_dma_offset = 2; + } + t4_set_reg_field(adapter, SGE_CONTROL, + PKTSHIFT_MASK, + PKTSHIFT(rx_dma_offset)); + + /* + * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux + * adds the pseudo header itself. + */ + t4_tp_wr_bits_indirect(adapter, TP_INGRESS_CONFIG, + CSUM_HAS_PSEUDO_HDR, 0); + + return 0; +} + +/* + * Attempt to initialize the adapter via a Firmware Configuration File. + */ +static int adap_init0_config(struct adapter *adapter, int reset) +{ + struct fw_caps_config_cmd caps_cmd; + const struct firmware *cf; + unsigned long mtype = 0, maddr = 0; + u32 finiver, finicsum, cfcsum; + int ret, using_flash; + + /* + * Reset device if necessary. + */ + if (reset) { + ret = t4_fw_reset(adapter, adapter->mbox, + PIORSTMODE | PIORST); + if (ret < 0) + goto bye; + } + + /* + * If we have a T4 configuration file under /lib/firmware/cxgb4/, + * then use that. Otherwise, use the configuration file stored + * in the adapter flash ... + */ + ret = request_firmware(&cf, FW_CFNAME, adapter->pdev_dev); + if (ret < 0) { + using_flash = 1; + mtype = FW_MEMTYPE_CF_FLASH; + maddr = t4_flash_cfg_addr(adapter); + } else { + u32 params[7], val[7]; + + using_flash = 0; + if (cf->size >= FLASH_CFG_MAX_SIZE) + ret = -ENOMEM; + else { + params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CF)); + ret = t4_query_params(adapter, adapter->mbox, + adapter->fn, 0, 1, params, val); + if (ret == 0) { + /* + * For t4_memory_write() below addresses and + * sizes have to be in terms of multiples of 4 + * bytes. So, if the Configuration File isn't + * a multiple of 4 bytes in length we'll have + * to write that out separately since we can't + * guarantee that the bytes following the + * residual byte in the buffer returned by + * request_firmware() are zeroed out ... + */ + size_t resid = cf->size & 0x3; + size_t size = cf->size & ~0x3; + __be32 *data = (__be32 *)cf->data; + + mtype = FW_PARAMS_PARAM_Y_GET(val[0]); + maddr = FW_PARAMS_PARAM_Z_GET(val[0]) << 16; + + ret = t4_memory_write(adapter, mtype, maddr, + size, data); + if (ret == 0 && resid != 0) { + union { + __be32 word; + char buf[4]; + } last; + int i; + + last.word = data[size >> 2]; + for (i = resid; i < 4; i++) + last.buf[i] = 0; + ret = t4_memory_write(adapter, mtype, + maddr + size, + 4, &last.word); + } + } + } + + release_firmware(cf); + if (ret) + goto bye; + } + + /* + * Issue a Capability Configuration command to the firmware to get it + * to parse the Configuration File. We don't use t4_fw_config_file() + * because we want the ability to modify various features after we've + * processed the configuration file ... + */ + memset(&caps_cmd, 0, sizeof(caps_cmd)); + caps_cmd.op_to_write = + htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + FW_CMD_REQUEST | + FW_CMD_READ); + caps_cmd.retval_len16 = + htonl(FW_CAPS_CONFIG_CMD_CFVALID | + FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | + FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) | + FW_LEN16(caps_cmd)); + ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd), + &caps_cmd); + if (ret < 0) + goto bye; + + finiver = ntohl(caps_cmd.finiver); + finicsum = ntohl(caps_cmd.finicsum); + cfcsum = ntohl(caps_cmd.cfcsum); + if (finicsum != cfcsum) + dev_warn(adapter->pdev_dev, "Configuration File checksum "\ + "mismatch: [fini] csum=%#x, computed csum=%#x\n", + finicsum, cfcsum); + + /* + * If we're a pure NIC driver then disable all offloading facilities. + * This will allow the firmware to optimize aspects of the hardware + * configuration which will result in improved performance. + */ + caps_cmd.ofldcaps = 0; + caps_cmd.iscsicaps = 0; + caps_cmd.rdmacaps = 0; + caps_cmd.fcoecaps = 0; + + /* + * And now tell the firmware to use the configuration we just loaded. + */ + caps_cmd.op_to_write = + htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + FW_CMD_REQUEST | + FW_CMD_WRITE); + caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd)); + ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd), + NULL); + if (ret < 0) + goto bye; + + /* + * Tweak configuration based on system architecture, module + * parameters, etc. + */ + ret = adap_init0_tweaks(adapter); + if (ret < 0) + goto bye; + + /* + * And finally tell the firmware to initialize itself using the + * parameters from the Configuration File. + */ + ret = t4_fw_initialize(adapter, adapter->mbox); + if (ret < 0) + goto bye; + + /* + * Return successfully and note that we're operating with parameters + * not supplied by the driver, rather than from hard-wired + * initialization constants burried in the driver. + */ + adapter->flags |= USING_SOFT_PARAMS; + dev_info(adapter->pdev_dev, "Successfully configured using Firmware "\ + "Configuration File %s, version %#x, computed checksum %#x\n", + (using_flash + ? "in device FLASH" + : "/lib/firmware/" FW_CFNAME), + finiver, cfcsum); + return 0; + + /* + * Something bad happened. Return the error ... (If the "error" + * is that there's no Configuration File on the adapter we don't + * want to issue a warning since this is fairly common.) + */ +bye: + if (ret != -ENOENT) + dev_warn(adapter->pdev_dev, "Configuration file error %d\n", + -ret); + return ret; +} + +/* + * Attempt to initialize the adapter via hard-coded, driver supplied + * parameters ... + */ +static int adap_init0_no_config(struct adapter *adapter, int reset) +{ + struct sge *s = &adapter->sge; + struct fw_caps_config_cmd caps_cmd; + u32 v; + int i, ret; + + /* + * Reset device if necessary + */ + if (reset) { + ret = t4_fw_reset(adapter, adapter->mbox, + PIORSTMODE | PIORST); + if (ret < 0) + goto bye; + } + + /* + * Get device capabilities and select which we'll be using. + */ + memset(&caps_cmd, 0, sizeof(caps_cmd)); + caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + FW_CMD_REQUEST | FW_CMD_READ); + caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd)); + ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd), + &caps_cmd); + if (ret < 0) + goto bye; + +#ifndef CONFIG_CHELSIO_T4_OFFLOAD + /* + * If we're a pure NIC driver then disable all offloading facilities. + * This will allow the firmware to optimize aspects of the hardware + * configuration which will result in improved performance. + */ + caps_cmd.ofldcaps = 0; + caps_cmd.iscsicaps = 0; + caps_cmd.rdmacaps = 0; + caps_cmd.fcoecaps = 0; +#endif + + if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_VM)) { + if (!vf_acls) + caps_cmd.niccaps ^= htons(FW_CAPS_CONFIG_NIC_VM); + else + caps_cmd.niccaps = htons(FW_CAPS_CONFIG_NIC_VM); + } else if (vf_acls) { + dev_err(adapter->pdev_dev, "virtualization ACLs not supported"); + goto bye; + } + caps_cmd.op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + FW_CMD_REQUEST | FW_CMD_WRITE); + ret = t4_wr_mbox(adapter, adapter->mbox, &caps_cmd, sizeof(caps_cmd), + NULL); + if (ret < 0) + goto bye; + + /* + * Tweak configuration based on system architecture, module + * parameters, etc. + */ + ret = adap_init0_tweaks(adapter); + if (ret < 0) + goto bye; + + /* + * Select RSS Global Mode we want to use. We use "Basic Virtual" + * mode which maps each Virtual Interface to its own section of + * the RSS Table and we turn on all map and hash enables ... + */ + adapter->flags |= RSS_TNLALLLOOKUP; + ret = t4_config_glbl_rss(adapter, adapter->mbox, + FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL, + FW_RSS_GLB_CONFIG_CMD_TNLMAPEN | + FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ | + ((adapter->flags & RSS_TNLALLLOOKUP) ? + FW_RSS_GLB_CONFIG_CMD_TNLALLLKP : 0)); + if (ret < 0) + goto bye; + + /* + * Set up our own fundamental resource provisioning ... + */ + ret = t4_cfg_pfvf(adapter, adapter->mbox, adapter->fn, 0, + PFRES_NEQ, PFRES_NETHCTRL, + PFRES_NIQFLINT, PFRES_NIQ, + PFRES_TC, PFRES_NVI, + FW_PFVF_CMD_CMASK_MASK, + pfvfres_pmask(adapter, adapter->fn, 0), + PFRES_NEXACTF, + PFRES_R_CAPS, PFRES_WX_CAPS); + if (ret < 0) + goto bye; + + /* + * Perform low level SGE initialization. We need to do this before we + * send the firmware the INITIALIZE command because that will cause + * any other PF Drivers which are waiting for the Master + * Initialization to proceed forward. + */ + for (i = 0; i < SGE_NTIMERS - 1; i++) + s->timer_val[i] = min(intr_holdoff[i], MAX_SGE_TIMERVAL); + s->timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL; + s->counter_val[0] = 1; + for (i = 1; i < SGE_NCOUNTERS; i++) + s->counter_val[i] = min(intr_cnt[i - 1], + THRESHOLD_0_GET(THRESHOLD_0_MASK)); + t4_sge_init(adapter); + +#ifdef CONFIG_PCI_IOV + /* + * Provision resource limits for Virtual Functions. We currently + * grant them all the same static resource limits except for the Port + * Access Rights Mask which we're assigning based on the PF. All of + * the static provisioning stuff for both the PF and VF really needs + * to be managed in a persistent manner for each device which the + * firmware controls. + */ + { + int pf, vf; + + for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) { + if (num_vf[pf] <= 0) + continue; + + /* VF numbering starts at 1! */ + for (vf = 1; vf <= num_vf[pf]; vf++) { + ret = t4_cfg_pfvf(adapter, adapter->mbox, + pf, vf, + VFRES_NEQ, VFRES_NETHCTRL, + VFRES_NIQFLINT, VFRES_NIQ, + VFRES_TC, VFRES_NVI, + FW_PFVF_CMD_CMASK_GET( + FW_PFVF_CMD_CMASK_MASK), + pfvfres_pmask( + adapter, pf, vf), + VFRES_NEXACTF, + VFRES_R_CAPS, VFRES_WX_CAPS); + if (ret < 0) + dev_warn(adapter->pdev_dev, + "failed to "\ + "provision pf/vf=%d/%d; " + "err=%d\n", pf, vf, ret); + } + } + } +#endif + + /* + * Set up the default filter mode. Later we'll want to implement this + * via a firmware command, etc. ... This needs to be done before the + * firmare initialization command ... If the selected set of fields + * isn't equal to the default value, we'll need to make sure that the + * field selections will fit in the 36-bit budget. + */ + if (tp_vlan_pri_map != TP_VLAN_PRI_MAP_DEFAULT) { + int i, bits = 0; + + for (i = TP_VLAN_PRI_MAP_FIRST; i <= TP_VLAN_PRI_MAP_LAST; i++) + switch (tp_vlan_pri_map & (1 << i)) { + case 0: + /* compressed filter field not enabled */ + break; + case FCOE_MASK: + bits += 1; + break; + case PORT_MASK: + bits += 3; + break; + case VNIC_ID_MASK: + bits += 17; + break; + case VLAN_MASK: + bits += 17; + break; + case TOS_MASK: + bits += 8; + break; + case PROTOCOL_MASK: + bits += 8; + break; + case ETHERTYPE_MASK: + bits += 16; + break; + case MACMATCH_MASK: + bits += 9; + break; + case MPSHITTYPE_MASK: + bits += 3; + break; + case FRAGMENTATION_MASK: + bits += 1; + break; + } + + if (bits > 36) { + dev_err(adapter->pdev_dev, + "tp_vlan_pri_map=%#x needs %d bits > 36;"\ + " using %#x\n", tp_vlan_pri_map, bits, + TP_VLAN_PRI_MAP_DEFAULT); + tp_vlan_pri_map = TP_VLAN_PRI_MAP_DEFAULT; + } + } + v = tp_vlan_pri_map; + t4_write_indirect(adapter, TP_PIO_ADDR, TP_PIO_DATA, + &v, 1, TP_VLAN_PRI_MAP); + + /* + * We need Five Tuple Lookup mode to be set in TP_GLOBAL_CONFIG order + * to support any of the compressed filter fields above. Newer + * versions of the firmware do this automatically but it doesn't hurt + * to set it here. Meanwhile, we do _not_ need to set Lookup Every + * Packet in TP_INGRESS_CONFIG to support matching non-TCP packets + * since the firmware automatically turns this on and off when we have + * a non-zero number of filters active (since it does have a + * performance impact). + */ + if (tp_vlan_pri_map) + t4_set_reg_field(adapter, TP_GLOBAL_CONFIG, + FIVETUPLELOOKUP_MASK, + FIVETUPLELOOKUP_MASK); + + /* + * Tweak some settings. + */ + t4_write_reg(adapter, TP_SHIFT_CNT, SYNSHIFTMAX(6) | + RXTSHIFTMAXR1(4) | RXTSHIFTMAXR2(15) | + PERSHIFTBACKOFFMAX(8) | PERSHIFTMAX(8) | + KEEPALIVEMAXR1(4) | KEEPALIVEMAXR2(9)); + + /* + * Get basic stuff going by issuing the Firmware Initialize command. + * Note that this _must_ be after all PFVF commands ... + */ + ret = t4_fw_initialize(adapter, adapter->mbox); + if (ret < 0) + goto bye; + + /* + * Return successfully! + */ + dev_info(adapter->pdev_dev, "Successfully configured using built-in "\ + "driver parameters\n"); + return 0; + + /* + * Something bad happened. Return the error ... + */ +bye: + return ret; +} + +/* + * Phase 0 of initialization: contact FW, obtain config, perform basic init. */ static int adap_init0(struct adapter *adap) { @@ -3162,72 +3739,216 @@ static int adap_init0(struct adapter *adap) u32 v, port_vec; enum dev_state state; u32 params[7], val[7]; - struct fw_caps_config_cmd c; - - ret = t4_check_fw_version(adap); - if (ret == -EINVAL || ret > 0) { - if (upgrade_fw(adap) >= 0) /* recache FW version */ - ret = t4_check_fw_version(adap); - } - if (ret < 0) - return ret; + int reset = 1, j; - /* contact FW, request master */ - ret = t4_fw_hello(adap, adap->fn, adap->fn, MASTER_MUST, &state); + /* + * Contact FW, advertising Master capability (and potentially forcing + * ourselves as the Master PF if our module parameter force_init is + * set). + */ + ret = t4_fw_hello(adap, adap->mbox, adap->fn, + force_init ? MASTER_MUST : MASTER_MAY, + &state); if (ret < 0) { dev_err(adap->pdev_dev, "could not connect to FW, error %d\n", ret); return ret; } + if (ret == adap->mbox) + adap->flags |= MASTER_PF; + if (force_init && state == DEV_STATE_INIT) + state = DEV_STATE_UNINIT; - /* reset device */ - ret = t4_fw_reset(adap, adap->fn, PIORSTMODE | PIORST); - if (ret < 0) - goto bye; - - for (v = 0; v < SGE_NTIMERS - 1; v++) - adap->sge.timer_val[v] = min(intr_holdoff[v], MAX_SGE_TIMERVAL); - adap->sge.timer_val[SGE_NTIMERS - 1] = MAX_SGE_TIMERVAL; - adap->sge.counter_val[0] = 1; - for (v = 1; v < SGE_NCOUNTERS; v++) - adap->sge.counter_val[v] = min(intr_cnt[v - 1], - THRESHOLD_3_MASK); -#define FW_PARAM_DEV(param) \ - (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ - FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) + /* + * If we're the Master PF Driver and the device is uninitialized, + * then let's consider upgrading the firmware ... (We always want + * to check the firmware version number in order to A. get it for + * later reporting and B. to warn if the currently loaded firmware + * is excessively mismatched relative to the driver.) + */ + ret = t4_check_fw_version(adap); + if ((adap->flags & MASTER_PF) && state != DEV_STATE_INIT) { + if (ret == -EINVAL || ret > 0) { + if (upgrade_fw(adap) >= 0) { + /* + * Note that the chip was reset as part of the + * firmware upgrade so we don't reset it again + * below and grab the new firmware version. + */ + reset = 0; + ret = t4_check_fw_version(adap); + } + } + if (ret < 0) + return ret; + } - params[0] = FW_PARAM_DEV(CCLK); - ret = t4_query_params(adap, adap->fn, adap->fn, 0, 1, params, val); + /* + * Grab VPD parameters. This should be done after we establish a + * connection to the firmware since some of the VPD parameters + * (notably the Core Clock frequency) are retrieved via requests to + * the firmware. On the other hand, we need these fairly early on + * so we do this right after getting ahold of the firmware. + */ + ret = get_vpd_params(adap, &adap->params.vpd); if (ret < 0) goto bye; - adap->params.vpd.cclk = val[0]; - ret = adap_init1(adap, &c); + /* + * Find out what ports are available to us. Note that we need to do + * this before calling adap_init0_no_config() since it needs nports + * and portvec ... + */ + v = + FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_PORTVEC); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1, &v, &port_vec); if (ret < 0) goto bye; + adap->params.nports = hweight32(port_vec); + adap->params.portvec = port_vec; + + /* + * If the firmware is initialized already (and we're not forcing a + * master initialization), note that we're living with existing + * adapter parameters. Otherwise, it's time to try initializing the + * adapter ... + */ + if (state == DEV_STATE_INIT) { + dev_info(adap->pdev_dev, "Coming up as %s: "\ + "Adapter already initialized\n", + adap->flags & MASTER_PF ? "MASTER" : "SLAVE"); + adap->flags |= USING_SOFT_PARAMS; + } else { + dev_info(adap->pdev_dev, "Coming up as MASTER: "\ + "Initializing adapter\n"); + + /* + * If the firmware doesn't support Configuration + * Files warn user and exit, + */ + if (ret < 0) + dev_warn(adap->pdev_dev, "Firmware doesn't support " + "configuration file.\n"); + if (force_old_init) + ret = adap_init0_no_config(adap, reset); + else { + /* + * Find out whether we're dealing with a version of + * the firmware which has configuration file support. + */ + params[0] = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CF)); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 1, + params, val); + + /* + * If the firmware doesn't support Configuration + * Files, use the old Driver-based, hard-wired + * initialization. Otherwise, try using the + * Configuration File support and fall back to the + * Driver-based initialization if there's no + * Configuration File found. + */ + if (ret < 0) + ret = adap_init0_no_config(adap, reset); + else { + /* + * The firmware provides us with a memory + * buffer where we can load a Configuration + * File from the host if we want to override + * the Configuration File in flash. + */ + + ret = adap_init0_config(adap, reset); + if (ret == -ENOENT) { + dev_info(adap->pdev_dev, + "No Configuration File present " + "on adapter. Using hard-wired " + "configuration parameters.\n"); + ret = adap_init0_no_config(adap, reset); + } + } + } + if (ret < 0) { + dev_err(adap->pdev_dev, + "could not initialize adapter, error %d\n", + -ret); + goto bye; + } + } + + /* + * If we're living with non-hard-coded parameters (either from a + * Firmware Configuration File or values programmed by a different PF + * Driver), give the SGE code a chance to pull in anything that it + * needs ... Note that this must be called after we retrieve our VPD + * parameters in order to know how to convert core ticks to seconds. + */ + if (adap->flags & USING_SOFT_PARAMS) { + ret = t4_sge_init(adap); + if (ret < 0) + goto bye; + } + + /* + * Grab some of our basic fundamental operating parameters. + */ +#define FW_PARAM_DEV(param) \ + (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ + FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) + #define FW_PARAM_PFVF(param) \ - (FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ - FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param) | \ - FW_PARAMS_PARAM_Y(adap->fn)) + FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ + FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)| \ + FW_PARAMS_PARAM_Y(0) | \ + FW_PARAMS_PARAM_Z(0) - params[0] = FW_PARAM_DEV(PORTVEC); + params[0] = FW_PARAM_PFVF(EQ_START); params[1] = FW_PARAM_PFVF(L2T_START); params[2] = FW_PARAM_PFVF(L2T_END); params[3] = FW_PARAM_PFVF(FILTER_START); params[4] = FW_PARAM_PFVF(FILTER_END); params[5] = FW_PARAM_PFVF(IQFLINT_START); - params[6] = FW_PARAM_PFVF(EQ_START); - ret = t4_query_params(adap, adap->fn, adap->fn, 0, 7, params, val); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, params, val); if (ret < 0) goto bye; - port_vec = val[0]; + adap->sge.egr_start = val[0]; + adap->l2t_start = val[1]; + adap->l2t_end = val[2]; adap->tids.ftid_base = val[3]; adap->tids.nftids = val[4] - val[3] + 1; adap->sge.ingr_start = val[5]; - adap->sge.egr_start = val[6]; - if (c.ofldcaps) { + /* query params related to active filter region */ + params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START); + params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val); + /* If Active filter size is set we enable establishing + * offload connection through firmware work request + */ + if ((val[0] != val[1]) && (ret >= 0)) { + adap->flags |= FW_OFLD_CONN; + adap->tids.aftid_base = val[0]; + adap->tids.aftid_end = val[1]; + } + +#ifdef CONFIG_CHELSIO_T4_OFFLOAD + /* + * Get device capabilities so we can determine what resources we need + * to manage. + */ + memset(&caps_cmd, 0, sizeof(caps_cmd)); + caps_cmd.op_to_write = htonl(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + FW_CMD_REQUEST | FW_CMD_READ); + caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd)); + ret = t4_wr_mbox(adap, adap->mbox, &caps_cmd, sizeof(caps_cmd), + &caps_cmd); + if (ret < 0) + goto bye; + + if (caps_cmd.ofldcaps) { /* query offload-related parameters */ params[0] = FW_PARAM_DEV(NTID); params[1] = FW_PARAM_PFVF(SERVER_START); @@ -3235,28 +3956,55 @@ static int adap_init0(struct adapter *adap) params[3] = FW_PARAM_PFVF(TDDP_START); params[4] = FW_PARAM_PFVF(TDDP_END); params[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); - ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params, - val); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, + params, val); if (ret < 0) goto bye; adap->tids.ntids = val[0]; adap->tids.natids = min(adap->tids.ntids / 2, MAX_ATIDS); adap->tids.stid_base = val[1]; adap->tids.nstids = val[2] - val[1] + 1; + /* + * Setup server filter region. Divide the availble filter + * region into two parts. Regular filters get 1/3rd and server + * filters get 2/3rd part. This is only enabled if workarond + * path is enabled. + * 1. For regular filters. + * 2. Server filter: This are special filters which are used + * to redirect SYN packets to offload queue. + */ + if (adap->flags & FW_OFLD_CONN && !is_bypass(adap)) { + adap->tids.sftid_base = adap->tids.ftid_base + + DIV_ROUND_UP(adap->tids.nftids, 3); + adap->tids.nsftids = adap->tids.nftids - + DIV_ROUND_UP(adap->tids.nftids, 3); + adap->tids.nftids = adap->tids.sftid_base - + adap->tids.ftid_base; + } adap->vres.ddp.start = val[3]; adap->vres.ddp.size = val[4] - val[3] + 1; adap->params.ofldq_wr_cred = val[5]; + + params[0] = FW_PARAM_PFVF(ETHOFLD_START); + params[1] = FW_PARAM_PFVF(ETHOFLD_END); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, + params, val); + if ((val[0] != val[1]) && (ret >= 0)) { + adap->tids.uotid_base = val[0]; + adap->tids.nuotids = val[1] - val[0] + 1; + } + adap->params.offload = 1; } - if (c.rdmacaps) { + if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); params[1] = FW_PARAM_PFVF(STAG_END); params[2] = FW_PARAM_PFVF(RQ_START); params[3] = FW_PARAM_PFVF(RQ_END); params[4] = FW_PARAM_PFVF(PBL_START); params[5] = FW_PARAM_PFVF(PBL_END); - ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params, - val); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 6, + params, val); if (ret < 0) goto bye; adap->vres.stag.start = val[0]; @@ -3272,8 +4020,7 @@ static int adap_init0(struct adapter *adap) params[3] = FW_PARAM_PFVF(CQ_END); params[4] = FW_PARAM_PFVF(OCQ_START); params[5] = FW_PARAM_PFVF(OCQ_END); - ret = t4_query_params(adap, adap->fn, adap->fn, 0, 6, params, - val); + ret = t4_query_params(adap, 0, 0, 0, 6, params, val); if (ret < 0) goto bye; adap->vres.qp.start = val[0]; @@ -3283,11 +4030,11 @@ static int adap_init0(struct adapter *adap) adap->vres.ocq.start = val[4]; adap->vres.ocq.size = val[5] - val[4] + 1; } - if (c.iscsicaps) { + if (caps_cmd.iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); params[1] = FW_PARAM_PFVF(ISCSI_END); - ret = t4_query_params(adap, adap->fn, adap->fn, 0, 2, params, - val); + ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, + params, val); if (ret < 0) goto bye; adap->vres.iscsi.start = val[0]; @@ -3295,63 +4042,33 @@ static int adap_init0(struct adapter *adap) } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV +#endif /* CONFIG_CHELSIO_T4_OFFLOAD */ - adap->params.nports = hweight32(port_vec); - adap->params.portvec = port_vec; - adap->flags |= FW_OK; - - /* These are finalized by FW initialization, load their values now */ + /* + * These are finalized by FW initialization, load their values now. + */ v = t4_read_reg(adap, TP_TIMER_RESOLUTION); adap->params.tp.tre = TIMERRESOLUTION_GET(v); + adap->params.tp.dack_re = DELAYEDACKRESOLUTION_GET(v); t4_read_mtu_tbl(adap, adap->params.mtus, NULL); t4_load_mtus(adap, adap->params.mtus, adap->params.a_wnd, adap->params.b_wnd); -#ifdef CONFIG_PCI_IOV - /* - * Provision resource limits for Virtual Functions. We currently - * grant them all the same static resource limits except for the Port - * Access Rights Mask which we're assigning based on the PF. All of - * the static provisioning stuff for both the PF and VF really needs - * to be managed in a persistent manner for each device which the - * firmware controls. - */ - { - int pf, vf; - - for (pf = 0; pf < ARRAY_SIZE(num_vf); pf++) { - if (num_vf[pf] <= 0) - continue; + /* MODQ_REQ_MAP defaults to setting queues 0-3 to chan 0-3 */ + for (j = 0; j < NCHAN; j++) + adap->params.tp.tx_modq[j] = j; - /* VF numbering starts at 1! */ - for (vf = 1; vf <= num_vf[pf]; vf++) { - ret = t4_cfg_pfvf(adap, adap->fn, pf, vf, - VFRES_NEQ, VFRES_NETHCTRL, - VFRES_NIQFLINT, VFRES_NIQ, - VFRES_TC, VFRES_NVI, - FW_PFVF_CMD_CMASK_MASK, - pfvfres_pmask(adap, pf, vf), - VFRES_NEXACTF, - VFRES_R_CAPS, VFRES_WX_CAPS); - if (ret < 0) - dev_warn(adap->pdev_dev, "failed to " - "provision pf/vf=%d/%d; " - "err=%d\n", pf, vf, ret); - } - } - } -#endif - - setup_memwin(adap); + adap->flags |= FW_OK; return 0; /* - * If a command timed out or failed with EIO FW does not operate within - * its spec or something catastrophic happened to HW/FW, stop issuing - * commands. + * Something bad happened. If a command timed out or failed with EIO + * FW does not operate within its spec or something catastrophic + * happened to HW/FW, stop issuing commands. */ -bye: if (ret != -ETIMEDOUT && ret != -EIO) - t4_fw_bye(adap, adap->fn); +bye: + if (ret != -ETIMEDOUT && ret != -EIO) + t4_fw_bye(adap, adap->mbox); return ret; } @@ -3453,7 +4170,7 @@ static void eeh_resume(struct pci_dev *pdev) rtnl_unlock(); } -static struct pci_error_handlers cxgb4_eeh = { +static const struct pci_error_handlers cxgb4_eeh = { .error_detected = eeh_err_detected, .slot_reset = eeh_slot_reset, .resume = eeh_resume, @@ -3694,15 +4411,7 @@ static void __devinit print_port_info(const struct net_device *dev) static void __devinit enable_pcie_relaxed_ordering(struct pci_dev *dev) { - u16 v; - int pos; - - pos = pci_pcie_cap(dev); - if (pos > 0) { - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &v); - v |= PCI_EXP_DEVCTL_RELAX_EN; - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, v); - } + pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); } /* @@ -3814,7 +4523,9 @@ static int __devinit init_one(struct pci_dev *pdev, err = t4_prep_adapter(adapter); if (err) goto out_unmap_bar; + setup_memwin(adapter); err = adap_init0(adapter); + setup_memwin_rdma(adapter); if (err) goto out_unmap_bar; @@ -3956,8 +4667,11 @@ static void __devexit remove_one(struct pci_dev *pdev) { struct adapter *adapter = pci_get_drvdata(pdev); +#ifdef CONFIG_PCI_IOV pci_disable_sriov(pdev); +#endif + if (adapter) { int i; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index d79980c5fc6..1b899fea1a9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -100,6 +100,8 @@ struct tid_info { unsigned int nftids; unsigned int ftid_base; + unsigned int aftid_base; + unsigned int aftid_end; spinlock_t atid_lock ____cacheline_aligned_in_smp; union aopen_entry *afree; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index d49933ed551..3ecc087d732 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -68,9 +68,6 @@ */ #define RX_PKT_SKB_LEN 512 -/* Ethernet header padding prepended to RX_PKTs */ -#define RX_PKT_PAD 2 - /* * Max number of Tx descriptors we clean up at a time. Should be modest as * freeing skbs isn't cheap and it happens while holding locks. We just need @@ -137,13 +134,6 @@ */ #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN -enum { - /* packet alignment in FL buffers */ - FL_ALIGN = L1_CACHE_BYTES < 32 ? 32 : L1_CACHE_BYTES, - /* egress status entry size */ - STAT_LEN = L1_CACHE_BYTES > 64 ? 128 : 64 -}; - struct tx_sw_desc { /* SW state per Tx descriptor */ struct sk_buff *skb; struct ulptx_sgl *sgl; @@ -155,16 +145,57 @@ struct rx_sw_desc { /* SW state per Rx descriptor */ }; /* - * The low bits of rx_sw_desc.dma_addr have special meaning. + * Rx buffer sizes for "useskbs" Free List buffers (one ingress packet pe skb + * buffer). We currently only support two sizes for 1500- and 9000-byte MTUs. + * We could easily support more but there doesn't seem to be much need for + * that ... + */ +#define FL_MTU_SMALL 1500 +#define FL_MTU_LARGE 9000 + +static inline unsigned int fl_mtu_bufsize(struct adapter *adapter, + unsigned int mtu) +{ + struct sge *s = &adapter->sge; + + return ALIGN(s->pktshift + ETH_HLEN + VLAN_HLEN + mtu, s->fl_align); +} + +#define FL_MTU_SMALL_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_SMALL) +#define FL_MTU_LARGE_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_LARGE) + +/* + * Bits 0..3 of rx_sw_desc.dma_addr have special meaning. The hardware uses + * these to specify the buffer size as an index into the SGE Free List Buffer + * Size register array. We also use bit 4, when the buffer has been unmapped + * for DMA, but this is of course never sent to the hardware and is only used + * to prevent double unmappings. All of the above requires that the Free List + * Buffers which we allocate have the bottom 5 bits free (0) -- i.e. are + * 32-byte or or a power of 2 greater in alignment. Since the SGE's minimal + * Free List Buffer alignment is 32 bytes, this works out for us ... */ enum { - RX_LARGE_BUF = 1 << 0, /* buffer is larger than PAGE_SIZE */ - RX_UNMAPPED_BUF = 1 << 1, /* buffer is not mapped */ + RX_BUF_FLAGS = 0x1f, /* bottom five bits are special */ + RX_BUF_SIZE = 0x0f, /* bottom three bits are for buf sizes */ + RX_UNMAPPED_BUF = 0x10, /* buffer is not mapped */ + + /* + * XXX We shouldn't depend on being able to use these indices. + * XXX Especially when some other Master PF has initialized the + * XXX adapter or we use the Firmware Configuration File. We + * XXX should really search through the Host Buffer Size register + * XXX array for the appropriately sized buffer indices. + */ + RX_SMALL_PG_BUF = 0x0, /* small (PAGE_SIZE) page buffer */ + RX_LARGE_PG_BUF = 0x1, /* buffer large (FL_PG_ORDER) page buffer */ + + RX_SMALL_MTU_BUF = 0x2, /* small MTU buffer */ + RX_LARGE_MTU_BUF = 0x3, /* large MTU buffer */ }; static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d) { - return d->dma_addr & ~(dma_addr_t)(RX_LARGE_BUF | RX_UNMAPPED_BUF); + return d->dma_addr & ~(dma_addr_t)RX_BUF_FLAGS; } static inline bool is_buf_mapped(const struct rx_sw_desc *d) @@ -392,14 +423,35 @@ static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q, } } -static inline int get_buf_size(const struct rx_sw_desc *d) +static inline int get_buf_size(struct adapter *adapter, + const struct rx_sw_desc *d) { -#if FL_PG_ORDER > 0 - return (d->dma_addr & RX_LARGE_BUF) ? (PAGE_SIZE << FL_PG_ORDER) : - PAGE_SIZE; -#else - return PAGE_SIZE; -#endif + struct sge *s = &adapter->sge; + unsigned int rx_buf_size_idx = d->dma_addr & RX_BUF_SIZE; + int buf_size; + + switch (rx_buf_size_idx) { + case RX_SMALL_PG_BUF: + buf_size = PAGE_SIZE; + break; + + case RX_LARGE_PG_BUF: + buf_size = PAGE_SIZE << s->fl_pg_order; + break; + + case RX_SMALL_MTU_BUF: + buf_size = FL_MTU_SMALL_BUFSIZE(adapter); + break; + + case RX_LARGE_MTU_BUF: + buf_size = FL_MTU_LARGE_BUFSIZE(adapter); + break; + + default: + BUG_ON(1); + } + + return buf_size; } /** @@ -418,7 +470,8 @@ static void free_rx_bufs(struct adapter *adap, struct sge_fl *q, int n) if (is_buf_mapped(d)) dma_unmap_page(adap->pdev_dev, get_buf_addr(d), - get_buf_size(d), PCI_DMA_FROMDEVICE); + get_buf_size(adap, d), + PCI_DMA_FROMDEVICE); put_page(d->page); d->page = NULL; if (++q->cidx == q->size) @@ -444,7 +497,7 @@ static void unmap_rx_buf(struct adapter *adap, struct sge_fl *q) if (is_buf_mapped(d)) dma_unmap_page(adap->pdev_dev, get_buf_addr(d), - get_buf_size(d), PCI_DMA_FROMDEVICE); + get_buf_size(adap, d), PCI_DMA_FROMDEVICE); d->page = NULL; if (++q->cidx == q->size) q->cidx = 0; @@ -485,6 +538,7 @@ static inline void set_rx_sw_desc(struct rx_sw_desc *sd, struct page *pg, static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) { + struct sge *s = &adap->sge; struct page *pg; dma_addr_t mapping; unsigned int cred = q->avail; @@ -493,25 +547,27 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp |= __GFP_NOWARN | __GFP_COLD; -#if FL_PG_ORDER > 0 + if (s->fl_pg_order == 0) + goto alloc_small_pages; + /* * Prefer large buffers */ while (n) { - pg = alloc_pages(gfp | __GFP_COMP, FL_PG_ORDER); + pg = alloc_pages(gfp | __GFP_COMP, s->fl_pg_order); if (unlikely(!pg)) { q->large_alloc_failed++; break; /* fall back to single pages */ } mapping = dma_map_page(adap->pdev_dev, pg, 0, - PAGE_SIZE << FL_PG_ORDER, + PAGE_SIZE << s->fl_pg_order, PCI_DMA_FROMDEVICE); if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) { - __free_pages(pg, FL_PG_ORDER); + __free_pages(pg, s->fl_pg_order); goto out; /* do not try small pages for this error */ } - mapping |= RX_LARGE_BUF; + mapping |= RX_LARGE_PG_BUF; *d++ = cpu_to_be64(mapping); set_rx_sw_desc(sd, pg, mapping); @@ -525,8 +581,8 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n, } n--; } -#endif +alloc_small_pages: while (n--) { pg = __skb_alloc_page(gfp, NULL); if (unlikely(!pg)) { @@ -769,8 +825,8 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) wmb(); /* write descriptors before telling HW */ spin_lock(&q->db_lock); if (!q->db_disabled) { - t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL), - V_QID(q->cntxt_id) | V_PIDX(n)); + t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), + QID(q->cntxt_id) | PIDX(n)); } q->db_pidx = q->pidx; spin_unlock(&q->db_lock); @@ -1519,6 +1575,8 @@ static noinline int handle_trace_pkt(struct adapter *adap, static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, const struct cpl_rx_pkt *pkt) { + struct adapter *adapter = rxq->rspq.adap; + struct sge *s = &adapter->sge; int ret; struct sk_buff *skb; @@ -1529,8 +1587,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, return; } - copy_frags(skb, gl, RX_PKT_PAD); - skb->len = gl->tot_len - RX_PKT_PAD; + copy_frags(skb, gl, s->pktshift); + skb->len = gl->tot_len - s->pktshift; skb->data_len = skb->len; skb->truesize += skb->data_len; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1566,6 +1624,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, struct sk_buff *skb; const struct cpl_rx_pkt *pkt; struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq); + struct sge *s = &q->adap->sge; if (unlikely(*(u8 *)rsp == CPL_TRACE_PKT)) return handle_trace_pkt(q->adap, si); @@ -1585,7 +1644,7 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, return 0; } - __skb_pull(skb, RX_PKT_PAD); /* remove ethernet header padding */ + __skb_pull(skb, s->pktshift); /* remove ethernet header padding */ skb->protocol = eth_type_trans(skb, q->netdev); skb_record_rx_queue(skb, q->idx); if (skb->dev->features & NETIF_F_RXHASH) @@ -1696,6 +1755,8 @@ static int process_responses(struct sge_rspq *q, int budget) int budget_left = budget; const struct rsp_ctrl *rc; struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq); + struct adapter *adapter = q->adap; + struct sge *s = &adapter->sge; while (likely(budget_left)) { rc = (void *)q->cur_desc + (q->iqe_len - sizeof(*rc)); @@ -1722,7 +1783,7 @@ static int process_responses(struct sge_rspq *q, int budget) /* gather packet fragments */ for (frags = 0, fp = si.frags; ; frags++, fp++) { rsd = &rxq->fl.sdesc[rxq->fl.cidx]; - bufsz = get_buf_size(rsd); + bufsz = get_buf_size(adapter, rsd); fp->page = rsd->page; fp->offset = q->offset; fp->size = min(bufsz, len); @@ -1747,7 +1808,7 @@ static int process_responses(struct sge_rspq *q, int budget) si.nfrags = frags + 1; ret = q->handler(q, q->cur_desc, &si); if (likely(ret == 0)) - q->offset += ALIGN(fp->size, FL_ALIGN); + q->offset += ALIGN(fp->size, s->fl_align); else restore_rx_bufs(&si, &rxq->fl, frags); } else if (likely(rsp_type == RSP_TYPE_CPL)) { @@ -1983,6 +2044,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, { int ret, flsz = 0; struct fw_iq_cmd c; + struct sge *s = &adap->sge; struct port_info *pi = netdev_priv(dev); /* Size needs to be multiple of 16, including status entry. */ @@ -2015,11 +2077,11 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, fl->size = roundup(fl->size, 8); fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64), sizeof(struct rx_sw_desc), &fl->addr, - &fl->sdesc, STAT_LEN, NUMA_NO_NODE); + &fl->sdesc, s->stat_len, NUMA_NO_NODE); if (!fl->desc) goto fl_nomem; - flsz = fl->size / 8 + STAT_LEN / sizeof(struct tx_desc); + flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc); c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN | FW_IQ_CMD_FL0FETCHRO(1) | FW_IQ_CMD_FL0DATARO(1) | @@ -2096,14 +2158,15 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, { int ret, nentries; struct fw_eq_eth_cmd c; + struct sge *s = &adap->sge; struct port_info *pi = netdev_priv(dev); /* Add status entries */ - nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc); + nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size, sizeof(struct tx_desc), sizeof(struct tx_sw_desc), - &txq->q.phys_addr, &txq->q.sdesc, STAT_LEN, + &txq->q.phys_addr, &txq->q.sdesc, s->stat_len, netdev_queue_numa_node_read(netdevq)); if (!txq->q.desc) return -ENOMEM; @@ -2149,10 +2212,11 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, { int ret, nentries; struct fw_eq_ctrl_cmd c; + struct sge *s = &adap->sge; struct port_info *pi = netdev_priv(dev); /* Add status entries */ - nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc); + nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); txq->q.desc = alloc_ring(adap->pdev_dev, nentries, sizeof(struct tx_desc), 0, &txq->q.phys_addr, @@ -2200,14 +2264,15 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, { int ret, nentries; struct fw_eq_ofld_cmd c; + struct sge *s = &adap->sge; struct port_info *pi = netdev_priv(dev); /* Add status entries */ - nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc); + nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size, sizeof(struct tx_desc), sizeof(struct tx_sw_desc), - &txq->q.phys_addr, &txq->q.sdesc, STAT_LEN, + &txq->q.phys_addr, &txq->q.sdesc, s->stat_len, NUMA_NO_NODE); if (!txq->q.desc) return -ENOMEM; @@ -2251,8 +2316,10 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, static void free_txq(struct adapter *adap, struct sge_txq *q) { + struct sge *s = &adap->sge; + dma_free_coherent(adap->pdev_dev, - q->size * sizeof(struct tx_desc) + STAT_LEN, + q->size * sizeof(struct tx_desc) + s->stat_len, q->desc, q->phys_addr); q->cntxt_id = 0; q->sdesc = NULL; @@ -2262,6 +2329,7 @@ static void free_txq(struct adapter *adap, struct sge_txq *q) static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl) { + struct sge *s = &adap->sge; unsigned int fl_id = fl ? fl->cntxt_id : 0xffff; adap->sge.ingr_map[rq->cntxt_id - adap->sge.ingr_start] = NULL; @@ -2276,7 +2344,7 @@ static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, if (fl) { free_rx_bufs(adap, fl, fl->avail); - dma_free_coherent(adap->pdev_dev, fl->size * 8 + STAT_LEN, + dma_free_coherent(adap->pdev_dev, fl->size * 8 + s->stat_len, fl->desc, fl->addr); kfree(fl->sdesc); fl->sdesc = NULL; @@ -2408,18 +2476,112 @@ void t4_sge_stop(struct adapter *adap) * Performs SGE initialization needed every time after a chip reset. * We do not initialize any of the queues here, instead the driver * top-level must request them individually. + * + * Called in two different modes: + * + * 1. Perform actual hardware initialization and record hard-coded + * parameters which were used. This gets used when we're the + * Master PF and the Firmware Configuration File support didn't + * work for some reason. + * + * 2. We're not the Master PF or initialization was performed with + * a Firmware Configuration File. In this case we need to grab + * any of the SGE operating parameters that we need to have in + * order to do our job and make sure we can live with them ... */ -void t4_sge_init(struct adapter *adap) + +static int t4_sge_init_soft(struct adapter *adap) { - unsigned int i, v; struct sge *s = &adap->sge; - unsigned int fl_align_log = ilog2(FL_ALIGN); + u32 fl_small_pg, fl_large_pg, fl_small_mtu, fl_large_mtu; + u32 timer_value_0_and_1, timer_value_2_and_3, timer_value_4_and_5; + u32 ingress_rx_threshold; - t4_set_reg_field(adap, SGE_CONTROL, PKTSHIFT_MASK | - INGPADBOUNDARY_MASK | EGRSTATUSPAGESIZE, - INGPADBOUNDARY(fl_align_log - 5) | PKTSHIFT(2) | - RXPKTCPLMODE | - (STAT_LEN == 128 ? EGRSTATUSPAGESIZE : 0)); + /* + * Verify that CPL messages are going to the Ingress Queue for + * process_responses() and that only packet data is going to the + * Free Lists. + */ + if ((t4_read_reg(adap, SGE_CONTROL) & RXPKTCPLMODE_MASK) != + RXPKTCPLMODE(X_RXPKTCPLMODE_SPLIT)) { + dev_err(adap->pdev_dev, "bad SGE CPL MODE\n"); + return -EINVAL; + } + + /* + * Validate the Host Buffer Register Array indices that we want to + * use ... + * + * XXX Note that we should really read through the Host Buffer Size + * XXX register array and find the indices of the Buffer Sizes which + * XXX meet our needs! + */ + #define READ_FL_BUF(x) \ + t4_read_reg(adap, SGE_FL_BUFFER_SIZE0+(x)*sizeof(u32)) + + fl_small_pg = READ_FL_BUF(RX_SMALL_PG_BUF); + fl_large_pg = READ_FL_BUF(RX_LARGE_PG_BUF); + fl_small_mtu = READ_FL_BUF(RX_SMALL_MTU_BUF); + fl_large_mtu = READ_FL_BUF(RX_LARGE_MTU_BUF); + + #undef READ_FL_BUF + + if (fl_small_pg != PAGE_SIZE || + (fl_large_pg != 0 && (fl_large_pg <= fl_small_pg || + (fl_large_pg & (fl_large_pg-1)) != 0))) { + dev_err(adap->pdev_dev, "bad SGE FL page buffer sizes [%d, %d]\n", + fl_small_pg, fl_large_pg); + return -EINVAL; + } + if (fl_large_pg) + s->fl_pg_order = ilog2(fl_large_pg) - PAGE_SHIFT; + + if (fl_small_mtu < FL_MTU_SMALL_BUFSIZE(adap) || + fl_large_mtu < FL_MTU_LARGE_BUFSIZE(adap)) { + dev_err(adap->pdev_dev, "bad SGE FL MTU sizes [%d, %d]\n", + fl_small_mtu, fl_large_mtu); + return -EINVAL; + } + + /* + * Retrieve our RX interrupt holdoff timer values and counter + * threshold values from the SGE parameters. + */ + timer_value_0_and_1 = t4_read_reg(adap, SGE_TIMER_VALUE_0_AND_1); + timer_value_2_and_3 = t4_read_reg(adap, SGE_TIMER_VALUE_2_AND_3); + timer_value_4_and_5 = t4_read_reg(adap, SGE_TIMER_VALUE_4_AND_5); + s->timer_val[0] = core_ticks_to_us(adap, + TIMERVALUE0_GET(timer_value_0_and_1)); + s->timer_val[1] = core_ticks_to_us(adap, + TIMERVALUE1_GET(timer_value_0_and_1)); + s->timer_val[2] = core_ticks_to_us(adap, + TIMERVALUE2_GET(timer_value_2_and_3)); + s->timer_val[3] = core_ticks_to_us(adap, + TIMERVALUE3_GET(timer_value_2_and_3)); + s->timer_val[4] = core_ticks_to_us(adap, + TIMERVALUE4_GET(timer_value_4_and_5)); + s->timer_val[5] = core_ticks_to_us(adap, + TIMERVALUE5_GET(timer_value_4_and_5)); + + ingress_rx_threshold = t4_read_reg(adap, SGE_INGRESS_RX_THRESHOLD); + s->counter_val[0] = THRESHOLD_0_GET(ingress_rx_threshold); + s->counter_val[1] = THRESHOLD_1_GET(ingress_rx_threshold); + s->counter_val[2] = THRESHOLD_2_GET(ingress_rx_threshold); + s->counter_val[3] = THRESHOLD_3_GET(ingress_rx_threshold); + + return 0; +} + +static int t4_sge_init_hard(struct adapter *adap) +{ + struct sge *s = &adap->sge; + + /* + * Set up our basic SGE mode to deliver CPL messages to our Ingress + * Queue and Packet Date to the Free List. + */ + t4_set_reg_field(adap, SGE_CONTROL, RXPKTCPLMODE_MASK, + RXPKTCPLMODE_MASK); /* * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows @@ -2433,13 +2595,24 @@ void t4_sge_init(struct adapter *adap) t4_set_reg_field(adap, A_SGE_DOORBELL_CONTROL, F_ENABLE_DROP, F_ENABLE_DROP); - for (i = v = 0; i < 32; i += 4) - v |= (PAGE_SHIFT - 10) << i; - t4_write_reg(adap, SGE_HOST_PAGE_SIZE, v); - t4_write_reg(adap, SGE_FL_BUFFER_SIZE0, PAGE_SIZE); -#if FL_PG_ORDER > 0 - t4_write_reg(adap, SGE_FL_BUFFER_SIZE1, PAGE_SIZE << FL_PG_ORDER); -#endif + /* + * SGE_FL_BUFFER_SIZE0 (RX_SMALL_PG_BUF) is set up by + * t4_fixup_host_params(). + */ + s->fl_pg_order = FL_PG_ORDER; + if (s->fl_pg_order) + t4_write_reg(adap, + SGE_FL_BUFFER_SIZE0+RX_LARGE_PG_BUF*sizeof(u32), + PAGE_SIZE << FL_PG_ORDER); + t4_write_reg(adap, SGE_FL_BUFFER_SIZE0+RX_SMALL_MTU_BUF*sizeof(u32), + FL_MTU_SMALL_BUFSIZE(adap)); + t4_write_reg(adap, SGE_FL_BUFFER_SIZE0+RX_LARGE_MTU_BUF*sizeof(u32), + FL_MTU_LARGE_BUFSIZE(adap)); + + /* + * Note that the SGE Ingress Packet Count Interrupt Threshold and + * Timer Holdoff values must be supplied by our caller. + */ t4_write_reg(adap, SGE_INGRESS_RX_THRESHOLD, THRESHOLD_0(s->counter_val[0]) | THRESHOLD_1(s->counter_val[1]) | @@ -2449,14 +2622,54 @@ void t4_sge_init(struct adapter *adap) TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[0])) | TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[1]))); t4_write_reg(adap, SGE_TIMER_VALUE_2_AND_3, - TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[2])) | - TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[3]))); + TIMERVALUE2(us_to_core_ticks(adap, s->timer_val[2])) | + TIMERVALUE3(us_to_core_ticks(adap, s->timer_val[3]))); t4_write_reg(adap, SGE_TIMER_VALUE_4_AND_5, - TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[4])) | - TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[5]))); + TIMERVALUE4(us_to_core_ticks(adap, s->timer_val[4])) | + TIMERVALUE5(us_to_core_ticks(adap, s->timer_val[5]))); + + return 0; +} + +int t4_sge_init(struct adapter *adap) +{ + struct sge *s = &adap->sge; + u32 sge_control; + int ret; + + /* + * Ingress Padding Boundary and Egress Status Page Size are set up by + * t4_fixup_host_params(). + */ + sge_control = t4_read_reg(adap, SGE_CONTROL); + s->pktshift = PKTSHIFT_GET(sge_control); + s->stat_len = (sge_control & EGRSTATUSPAGESIZE_MASK) ? 128 : 64; + s->fl_align = 1 << (INGPADBOUNDARY_GET(sge_control) + + X_INGPADBOUNDARY_SHIFT); + + if (adap->flags & USING_SOFT_PARAMS) + ret = t4_sge_init_soft(adap); + else + ret = t4_sge_init_hard(adap); + if (ret < 0) + return ret; + + /* + * A FL with <= fl_starve_thres buffers is starving and a periodic + * timer will attempt to refill it. This needs to be larger than the + * SGE's Egress Congestion Threshold. If it isn't, then we can get + * stuck waiting for new packets while the SGE is waiting for us to + * give it more Free List entries. (Note that the SGE's Egress + * Congestion Threshold is in units of 2 Free List pointers.) + */ + s->fl_starve_thres + = EGRTHRESHOLD_GET(t4_read_reg(adap, SGE_CONM_CTRL))*2 + 1; + setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); s->starve_thres = core_ticks_per_usec(adap) * 1000000; /* 1 s */ s->idma_state[0] = s->idma_state[1] = 0; spin_lock_init(&s->intrq_lock); + + return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index fa947dfa4c3..35b81d8b59e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -120,6 +120,28 @@ static void t4_read_indirect(struct adapter *adap, unsigned int addr_reg, } } +/** + * t4_write_indirect - write indirectly addressed registers + * @adap: the adapter + * @addr_reg: register holding the indirect addresses + * @data_reg: register holding the value for the indirect registers + * @vals: values to write + * @nregs: how many indirect registers to write + * @start_idx: address of first indirect register to write + * + * Writes a sequential block of registers that are accessed indirectly + * through an address/data register pair. + */ +void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, + unsigned int data_reg, const u32 *vals, + unsigned int nregs, unsigned int start_idx) +{ + while (nregs--) { + t4_write_reg(adap, addr_reg, start_idx++); + t4_write_reg(adap, data_reg, *vals++); + } +} + /* * Get the reply to a mailbox command and store it in @rpl in big-endian order. */ @@ -330,6 +352,143 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) return 0; } +/* + * t4_mem_win_rw - read/write memory through PCIE memory window + * @adap: the adapter + * @addr: address of first byte requested + * @data: MEMWIN0_APERTURE bytes of data containing the requested address + * @dir: direction of transfer 1 => read, 0 => write + * + * Read/write MEMWIN0_APERTURE bytes of data from MC starting at a + * MEMWIN0_APERTURE-byte-aligned address that covers the requested + * address @addr. + */ +static int t4_mem_win_rw(struct adapter *adap, u32 addr, __be32 *data, int dir) +{ + int i; + + /* + * Setup offset into PCIE memory window. Address must be a + * MEMWIN0_APERTURE-byte-aligned address. (Read back MA register to + * ensure that changes propagate before we attempt to use the new + * values.) + */ + t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET, + addr & ~(MEMWIN0_APERTURE - 1)); + t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET); + + /* Collecting data 4 bytes at a time upto MEMWIN0_APERTURE */ + for (i = 0; i < MEMWIN0_APERTURE; i = i+0x4) { + if (dir) + *data++ = t4_read_reg(adap, (MEMWIN0_BASE + i)); + else + t4_write_reg(adap, (MEMWIN0_BASE + i), *data++); + } + + return 0; +} + +/** + * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window + * @adap: the adapter + * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC + * @addr: address within indicated memory type + * @len: amount of memory to transfer + * @buf: host memory buffer + * @dir: direction of transfer 1 => read, 0 => write + * + * Reads/writes an [almost] arbitrary memory region in the firmware: the + * firmware memory address, length and host buffer must be aligned on + * 32-bit boudaries. The memory is transferred as a raw byte sequence + * from/to the firmware's memory. If this memory contains data + * structures which contain multi-byte integers, it's the callers + * responsibility to perform appropriate byte order conversions. + */ +static int t4_memory_rw(struct adapter *adap, int mtype, u32 addr, u32 len, + __be32 *buf, int dir) +{ + u32 pos, start, end, offset, memoffset; + int ret; + + /* + * Argument sanity checks ... + */ + if ((addr & 0x3) || (len & 0x3)) + return -EINVAL; + + /* + * Offset into the region of memory which is being accessed + * MEM_EDC0 = 0 + * MEM_EDC1 = 1 + * MEM_MC = 2 + */ + memoffset = (mtype * (5 * 1024 * 1024)); + + /* Determine the PCIE_MEM_ACCESS_OFFSET */ + addr = addr + memoffset; + + /* + * The underlaying EDC/MC read routines read MEMWIN0_APERTURE bytes + * at a time so we need to round down the start and round up the end. + * We'll start copying out of the first line at (addr - start) a word + * at a time. + */ + start = addr & ~(MEMWIN0_APERTURE-1); + end = (addr + len + MEMWIN0_APERTURE-1) & ~(MEMWIN0_APERTURE-1); + offset = (addr - start)/sizeof(__be32); + + for (pos = start; pos < end; pos += MEMWIN0_APERTURE, offset = 0) { + __be32 data[MEMWIN0_APERTURE/sizeof(__be32)]; + + /* + * If we're writing, copy the data from the caller's memory + * buffer + */ + if (!dir) { + /* + * If we're doing a partial write, then we need to do + * a read-modify-write ... + */ + if (offset || len < MEMWIN0_APERTURE) { + ret = t4_mem_win_rw(adap, pos, data, 1); + if (ret) + return ret; + } + while (offset < (MEMWIN0_APERTURE/sizeof(__be32)) && + len > 0) { + data[offset++] = *buf++; + len -= sizeof(__be32); + } + } + + /* + * Transfer a block of memory and bail if there's an error. + */ + ret = t4_mem_win_rw(adap, pos, data, dir); + if (ret) + return ret; + + /* + * If we're reading, copy the data into the caller's memory + * buffer. + */ + if (dir) + while (offset < (MEMWIN0_APERTURE/sizeof(__be32)) && + len > 0) { + *buf++ = data[offset++]; + len -= sizeof(__be32); + } + } + + return 0; +} + +int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len, + __be32 *buf) +{ + return t4_memory_rw(adap, mtype, addr, len, buf, 0); +} + #define EEPROM_STAT_ADDR 0x7bfc #define VPD_BASE 0 #define VPD_LEN 512 @@ -355,8 +514,9 @@ int t4_seeprom_wp(struct adapter *adapter, bool enable) * * Reads card parameters stored in VPD EEPROM. */ -static int get_vpd_params(struct adapter *adapter, struct vpd_params *p) +int get_vpd_params(struct adapter *adapter, struct vpd_params *p) { + u32 cclk_param, cclk_val; int i, ret; int ec, sn; u8 vpd[VPD_LEN], csum; @@ -418,6 +578,19 @@ static int get_vpd_params(struct adapter *adapter, struct vpd_params *p) i = pci_vpd_info_field_size(vpd + sn - PCI_VPD_INFO_FLD_HDR_SIZE); memcpy(p->sn, vpd + sn, min(i, SERNUM_LEN)); strim(p->sn); + + /* + * Ask firmware for the Core Clock since it knows how to translate the + * Reference Clock ('V2') VPD field into a Core Clock value ... + */ + cclk_param = (FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_CCLK)); + ret = t4_query_params(adapter, adapter->mbox, 0, 0, + 1, &cclk_param, &cclk_val); + if (ret) + return ret; + p->cclk = cclk_val; + return 0; } @@ -718,6 +891,77 @@ static int t4_flash_erase_sectors(struct adapter *adapter, int start, int end) } /** + * t4_flash_cfg_addr - return the address of the flash configuration file + * @adapter: the adapter + * + * Return the address within the flash where the Firmware Configuration + * File is stored. + */ +unsigned int t4_flash_cfg_addr(struct adapter *adapter) +{ + if (adapter->params.sf_size == 0x100000) + return FLASH_FPGA_CFG_START; + else + return FLASH_CFG_START; +} + +/** + * t4_load_cfg - download config file + * @adap: the adapter + * @cfg_data: the cfg text file to write + * @size: text file size + * + * Write the supplied config text file to the card's serial flash. + */ +int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size) +{ + int ret, i, n; + unsigned int addr; + unsigned int flash_cfg_start_sec; + unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; + + addr = t4_flash_cfg_addr(adap); + flash_cfg_start_sec = addr / SF_SEC_SIZE; + + if (size > FLASH_CFG_MAX_SIZE) { + dev_err(adap->pdev_dev, "cfg file too large, max is %u bytes\n", + FLASH_CFG_MAX_SIZE); + return -EFBIG; + } + + i = DIV_ROUND_UP(FLASH_CFG_MAX_SIZE, /* # of sectors spanned */ + sf_sec_size); + ret = t4_flash_erase_sectors(adap, flash_cfg_start_sec, + flash_cfg_start_sec + i - 1); + /* + * If size == 0 then we're simply erasing the FLASH sectors associated + * with the on-adapter Firmware Configuration File. + */ + if (ret || size == 0) + goto out; + + /* this will write to the flash up to SF_PAGE_SIZE at a time */ + for (i = 0; i < size; i += SF_PAGE_SIZE) { + if ((size - i) < SF_PAGE_SIZE) + n = size - i; + else + n = SF_PAGE_SIZE; + ret = t4_write_flash(adap, addr, n, cfg_data); + if (ret) + goto out; + + addr += SF_PAGE_SIZE; + cfg_data += SF_PAGE_SIZE; + } + +out: + if (ret) + dev_err(adap->pdev_dev, "config file %s failed %d\n", + (size == 0 ? "clear" : "download"), ret); + return ret; +} + +/** * t4_load_fw - download firmware * @adap: the adapter * @fw_data: the firmware image to write @@ -1018,9 +1262,9 @@ static void sge_intr_handler(struct adapter *adapter) { ERR_INVALID_CIDX_INC, "SGE GTS CIDX increment too large", -1, 0 }, { ERR_CPL_OPCODE_0, "SGE received 0-length CPL", -1, 0 }, - { F_DBFIFO_LP_INT, NULL, -1, 0, t4_db_full }, - { F_DBFIFO_HP_INT, NULL, -1, 0, t4_db_full }, - { F_ERR_DROPPED_DB, NULL, -1, 0, t4_db_dropped }, + { DBFIFO_LP_INT, NULL, -1, 0, t4_db_full }, + { DBFIFO_HP_INT, NULL, -1, 0, t4_db_full }, + { ERR_DROPPED_DB, NULL, -1, 0, t4_db_dropped }, { ERR_DATA_CPL_ON_HIGH_QID1 | ERR_DATA_CPL_ON_HIGH_QID0, "SGE IQID > 1023 received CPL for FL", -1, 0 }, { ERR_BAD_DB_PIDX3, "SGE DBP 3 pidx increment too large", -1, @@ -1520,7 +1764,7 @@ void t4_intr_enable(struct adapter *adapter) ERR_BAD_DB_PIDX2 | ERR_BAD_DB_PIDX1 | ERR_BAD_DB_PIDX0 | ERR_ING_CTXT_PRIO | ERR_EGR_CTXT_PRIO | INGRESS_SIZE_ERR | - F_DBFIFO_HP_INT | F_DBFIFO_LP_INT | + DBFIFO_HP_INT | DBFIFO_LP_INT | EGRESS_SIZE_ERR); t4_write_reg(adapter, MYPF_REG(PL_PF_INT_ENABLE), PF_INTR_MASK); t4_set_reg_field(adapter, PL_INT_MAP0, 0, 1 << pf); @@ -1717,6 +1961,23 @@ void t4_read_mtu_tbl(struct adapter *adap, u16 *mtus, u8 *mtu_log) } /** + * t4_tp_wr_bits_indirect - set/clear bits in an indirect TP register + * @adap: the adapter + * @addr: the indirect TP register address + * @mask: specifies the field within the register to modify + * @val: new value for the field + * + * Sets a field of an indirect TP register to the given value. + */ +void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr, + unsigned int mask, unsigned int val) +{ + t4_write_reg(adap, TP_PIO_ADDR, addr); + val |= t4_read_reg(adap, TP_PIO_DATA) & ~mask; + t4_write_reg(adap, TP_PIO_DATA, val); +} + +/** * init_cong_ctrl - initialize congestion control parameters * @a: the alpha values for congestion control * @b: the beta values for congestion control @@ -2000,9 +2261,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, struct fw_ldst_cmd c; memset(&c, 0, sizeof(c)); - c.op_to_addrspace = htonl(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | - F_FW_CMD_WRITE | - V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FIRMWARE)); + c.op_to_addrspace = htonl(FW_CMD_OP(FW_LDST_CMD) | FW_CMD_REQUEST | + FW_CMD_WRITE | + FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FIRMWARE)); c.cycles_to_len16 = htonl(FW_LEN16(c)); c.u.addrval.addr = htonl(addr); c.u.addrval.val = htonl(val); @@ -2033,8 +2294,8 @@ int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len) if ((addr & 3) || (len + off) > MEMWIN0_APERTURE) return -EINVAL; - t4_write_reg(adap, A_PCIE_MEM_ACCESS_OFFSET, addr & ~15); - t4_read_reg(adap, A_PCIE_MEM_ACCESS_OFFSET); + t4_write_reg(adap, PCIE_MEM_ACCESS_OFFSET, addr & ~15); + t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET); for (i = 0; i < len; i += 4) *data++ = t4_read_reg(adap, (MEMWIN0_BASE + off + i)); @@ -2102,39 +2363,129 @@ int t4_mdio_wr(struct adapter *adap, unsigned int mbox, unsigned int phy_addr, } /** - * t4_fw_hello - establish communication with FW - * @adap: the adapter - * @mbox: mailbox to use for the FW command - * @evt_mbox: mailbox to receive async FW events - * @master: specifies the caller's willingness to be the device master - * @state: returns the current device state + * t4_fw_hello - establish communication with FW + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * @evt_mbox: mailbox to receive async FW events + * @master: specifies the caller's willingness to be the device master + * @state: returns the current device state (if non-NULL) * - * Issues a command to establish communication with FW. + * Issues a command to establish communication with FW. Returns either + * an error (negative integer) or the mailbox of the Master PF. */ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox, enum dev_master master, enum dev_state *state) { int ret; struct fw_hello_cmd c; + u32 v; + unsigned int master_mbox; + int retries = FW_CMD_HELLO_RETRIES; +retry: + memset(&c, 0, sizeof(c)); INIT_CMD(c, HELLO, WRITE); c.err_to_mbasyncnot = htonl( FW_HELLO_CMD_MASTERDIS(master == MASTER_CANT) | FW_HELLO_CMD_MASTERFORCE(master == MASTER_MUST) | - FW_HELLO_CMD_MBMASTER(master == MASTER_MUST ? mbox : 0xff) | - FW_HELLO_CMD_MBASYNCNOT(evt_mbox)); + FW_HELLO_CMD_MBMASTER(master == MASTER_MUST ? mbox : + FW_HELLO_CMD_MBMASTER_MASK) | + FW_HELLO_CMD_MBASYNCNOT(evt_mbox) | + FW_HELLO_CMD_STAGE(fw_hello_cmd_stage_os) | + FW_HELLO_CMD_CLEARINIT); + /* + * Issue the HELLO command to the firmware. If it's not successful + * but indicates that we got a "busy" or "timeout" condition, retry + * the HELLO until we exhaust our retry limit. + */ ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c); - if (ret == 0 && state) { - u32 v = ntohl(c.err_to_mbasyncnot); - if (v & FW_HELLO_CMD_INIT) - *state = DEV_STATE_INIT; - else if (v & FW_HELLO_CMD_ERR) + if (ret < 0) { + if ((ret == -EBUSY || ret == -ETIMEDOUT) && retries-- > 0) + goto retry; + return ret; + } + + v = ntohl(c.err_to_mbasyncnot); + master_mbox = FW_HELLO_CMD_MBMASTER_GET(v); + if (state) { + if (v & FW_HELLO_CMD_ERR) *state = DEV_STATE_ERR; + else if (v & FW_HELLO_CMD_INIT) + *state = DEV_STATE_INIT; else *state = DEV_STATE_UNINIT; } - return ret; + + /* + * If we're not the Master PF then we need to wait around for the + * Master PF Driver to finish setting up the adapter. + * + * Note that we also do this wait if we're a non-Master-capable PF and + * there is no current Master PF; a Master PF may show up momentarily + * and we wouldn't want to fail pointlessly. (This can happen when an + * OS loads lots of different drivers rapidly at the same time). In + * this case, the Master PF returned by the firmware will be + * FW_PCIE_FW_MASTER_MASK so the test below will work ... + */ + if ((v & (FW_HELLO_CMD_ERR|FW_HELLO_CMD_INIT)) == 0 && + master_mbox != mbox) { + int waiting = FW_CMD_HELLO_TIMEOUT; + + /* + * Wait for the firmware to either indicate an error or + * initialized state. If we see either of these we bail out + * and report the issue to the caller. If we exhaust the + * "hello timeout" and we haven't exhausted our retries, try + * again. Otherwise bail with a timeout error. + */ + for (;;) { + u32 pcie_fw; + + msleep(50); + waiting -= 50; + + /* + * If neither Error nor Initialialized are indicated + * by the firmware keep waiting till we exaust our + * timeout ... and then retry if we haven't exhausted + * our retries ... + */ + pcie_fw = t4_read_reg(adap, MA_PCIE_FW); + if (!(pcie_fw & (FW_PCIE_FW_ERR|FW_PCIE_FW_INIT))) { + if (waiting <= 0) { + if (retries-- > 0) + goto retry; + + return -ETIMEDOUT; + } + continue; + } + + /* + * We either have an Error or Initialized condition + * report errors preferentially. + */ + if (state) { + if (pcie_fw & FW_PCIE_FW_ERR) + *state = DEV_STATE_ERR; + else if (pcie_fw & FW_PCIE_FW_INIT) + *state = DEV_STATE_INIT; + } + + /* + * If we arrived before a Master PF was selected and + * there's not a valid Master PF, grab its identity + * for our caller. + */ + if (master_mbox == FW_PCIE_FW_MASTER_MASK && + (pcie_fw & FW_PCIE_FW_MASTER_VLD)) + master_mbox = FW_PCIE_FW_MASTER_GET(pcie_fw); + break; + } + } + + return master_mbox; } /** @@ -2186,6 +2537,334 @@ int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset) } /** + * t4_fw_halt - issue a reset/halt to FW and put uP into RESET + * @adap: the adapter + * @mbox: mailbox to use for the FW RESET command (if desired) + * @force: force uP into RESET even if FW RESET command fails + * + * Issues a RESET command to firmware (if desired) with a HALT indication + * and then puts the microprocessor into RESET state. The RESET command + * will only be issued if a legitimate mailbox is provided (mbox <= + * FW_PCIE_FW_MASTER_MASK). + * + * This is generally used in order for the host to safely manipulate the + * adapter without fear of conflicting with whatever the firmware might + * be doing. The only way out of this state is to RESTART the firmware + * ... + */ +int t4_fw_halt(struct adapter *adap, unsigned int mbox, int force) +{ + int ret = 0; + + /* + * If a legitimate mailbox is provided, issue a RESET command + * with a HALT indication. + */ + if (mbox <= FW_PCIE_FW_MASTER_MASK) { + struct fw_reset_cmd c; + + memset(&c, 0, sizeof(c)); + INIT_CMD(c, RESET, WRITE); + c.val = htonl(PIORST | PIORSTMODE); + c.halt_pkd = htonl(FW_RESET_CMD_HALT(1U)); + ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); + } + + /* + * Normally we won't complete the operation if the firmware RESET + * command fails but if our caller insists we'll go ahead and put the + * uP into RESET. This can be useful if the firmware is hung or even + * missing ... We'll have to take the risk of putting the uP into + * RESET without the cooperation of firmware in that case. + * + * We also force the firmware's HALT flag to be on in case we bypassed + * the firmware RESET command above or we're dealing with old firmware + * which doesn't have the HALT capability. This will serve as a flag + * for the incoming firmware to know that it's coming out of a HALT + * rather than a RESET ... if it's new enough to understand that ... + */ + if (ret == 0 || force) { + t4_set_reg_field(adap, CIM_BOOT_CFG, UPCRST, UPCRST); + t4_set_reg_field(adap, PCIE_FW, FW_PCIE_FW_HALT, + FW_PCIE_FW_HALT); + } + + /* + * And we always return the result of the firmware RESET command + * even when we force the uP into RESET ... + */ + return ret; +} + +/** + * t4_fw_restart - restart the firmware by taking the uP out of RESET + * @adap: the adapter + * @reset: if we want to do a RESET to restart things + * + * Restart firmware previously halted by t4_fw_halt(). On successful + * return the previous PF Master remains as the new PF Master and there + * is no need to issue a new HELLO command, etc. + * + * We do this in two ways: + * + * 1. If we're dealing with newer firmware we'll simply want to take + * the chip's microprocessor out of RESET. This will cause the + * firmware to start up from its start vector. And then we'll loop + * until the firmware indicates it's started again (PCIE_FW.HALT + * reset to 0) or we timeout. + * + * 2. If we're dealing with older firmware then we'll need to RESET + * the chip since older firmware won't recognize the PCIE_FW.HALT + * flag and automatically RESET itself on startup. + */ +int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset) +{ + if (reset) { + /* + * Since we're directing the RESET instead of the firmware + * doing it automatically, we need to clear the PCIE_FW.HALT + * bit. + */ + t4_set_reg_field(adap, PCIE_FW, FW_PCIE_FW_HALT, 0); + + /* + * If we've been given a valid mailbox, first try to get the + * firmware to do the RESET. If that works, great and we can + * return success. Otherwise, if we haven't been given a + * valid mailbox or the RESET command failed, fall back to + * hitting the chip with a hammer. + */ + if (mbox <= FW_PCIE_FW_MASTER_MASK) { + t4_set_reg_field(adap, CIM_BOOT_CFG, UPCRST, 0); + msleep(100); + if (t4_fw_reset(adap, mbox, + PIORST | PIORSTMODE) == 0) + return 0; + } + + t4_write_reg(adap, PL_RST, PIORST | PIORSTMODE); + msleep(2000); + } else { + int ms; + + t4_set_reg_field(adap, CIM_BOOT_CFG, UPCRST, 0); + for (ms = 0; ms < FW_CMD_MAX_TIMEOUT; ) { + if (!(t4_read_reg(adap, PCIE_FW) & FW_PCIE_FW_HALT)) + return 0; + msleep(100); + ms += 100; + } + return -ETIMEDOUT; + } + return 0; +} + +/** + * t4_fw_upgrade - perform all of the steps necessary to upgrade FW + * @adap: the adapter + * @mbox: mailbox to use for the FW RESET command (if desired) + * @fw_data: the firmware image to write + * @size: image size + * @force: force upgrade even if firmware doesn't cooperate + * + * Perform all of the steps necessary for upgrading an adapter's + * firmware image. Normally this requires the cooperation of the + * existing firmware in order to halt all existing activities + * but if an invalid mailbox token is passed in we skip that step + * (though we'll still put the adapter microprocessor into RESET in + * that case). + * + * On successful return the new firmware will have been loaded and + * the adapter will have been fully RESET losing all previous setup + * state. On unsuccessful return the adapter may be completely hosed ... + * positive errno indicates that the adapter is ~probably~ intact, a + * negative errno indicates that things are looking bad ... + */ +int t4_fw_upgrade(struct adapter *adap, unsigned int mbox, + const u8 *fw_data, unsigned int size, int force) +{ + const struct fw_hdr *fw_hdr = (const struct fw_hdr *)fw_data; + int reset, ret; + + ret = t4_fw_halt(adap, mbox, force); + if (ret < 0 && !force) + return ret; + + ret = t4_load_fw(adap, fw_data, size); + if (ret < 0) + return ret; + + /* + * Older versions of the firmware don't understand the new + * PCIE_FW.HALT flag and so won't know to perform a RESET when they + * restart. So for newly loaded older firmware we'll have to do the + * RESET for it so it starts up on a clean slate. We can tell if + * the newly loaded firmware will handle this right by checking + * its header flags to see if it advertises the capability. + */ + reset = ((ntohl(fw_hdr->flags) & FW_HDR_FLAGS_RESET_HALT) == 0); + return t4_fw_restart(adap, mbox, reset); +} + + +/** + * t4_fw_config_file - setup an adapter via a Configuration File + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * @mtype: the memory type where the Configuration File is located + * @maddr: the memory address where the Configuration File is located + * @finiver: return value for CF [fini] version + * @finicsum: return value for CF [fini] checksum + * @cfcsum: return value for CF computed checksum + * + * Issue a command to get the firmware to process the Configuration + * File located at the specified mtype/maddress. If the Configuration + * File is processed successfully and return value pointers are + * provided, the Configuration File "[fini] section version and + * checksum values will be returned along with the computed checksum. + * It's up to the caller to decide how it wants to respond to the + * checksums not matching but it recommended that a prominant warning + * be emitted in order to help people rapidly identify changed or + * corrupted Configuration Files. + * + * Also note that it's possible to modify things like "niccaps", + * "toecaps",etc. between processing the Configuration File and telling + * the firmware to use the new configuration. Callers which want to + * do this will need to "hand-roll" their own CAPS_CONFIGS commands for + * Configuration Files if they want to do this. + */ +int t4_fw_config_file(struct adapter *adap, unsigned int mbox, + unsigned int mtype, unsigned int maddr, + u32 *finiver, u32 *finicsum, u32 *cfcsum) +{ + struct fw_caps_config_cmd caps_cmd; + int ret; + + /* + * Tell the firmware to process the indicated Configuration File. + * If there are no errors and the caller has provided return value + * pointers for the [fini] section version, checksum and computed + * checksum, pass those back to the caller. + */ + memset(&caps_cmd, 0, sizeof(caps_cmd)); + caps_cmd.op_to_write = + htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + FW_CMD_REQUEST | + FW_CMD_READ); + caps_cmd.retval_len16 = + htonl(FW_CAPS_CONFIG_CMD_CFVALID | + FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | + FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(maddr >> 16) | + FW_LEN16(caps_cmd)); + ret = t4_wr_mbox(adap, mbox, &caps_cmd, sizeof(caps_cmd), &caps_cmd); + if (ret < 0) + return ret; + + if (finiver) + *finiver = ntohl(caps_cmd.finiver); + if (finicsum) + *finicsum = ntohl(caps_cmd.finicsum); + if (cfcsum) + *cfcsum = ntohl(caps_cmd.cfcsum); + + /* + * And now tell the firmware to use the configuration we just loaded. + */ + caps_cmd.op_to_write = + htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) | + FW_CMD_REQUEST | + FW_CMD_WRITE); + caps_cmd.retval_len16 = htonl(FW_LEN16(caps_cmd)); + return t4_wr_mbox(adap, mbox, &caps_cmd, sizeof(caps_cmd), NULL); +} + +/** + * t4_fixup_host_params - fix up host-dependent parameters + * @adap: the adapter + * @page_size: the host's Base Page Size + * @cache_line_size: the host's Cache Line Size + * + * Various registers in T4 contain values which are dependent on the + * host's Base Page and Cache Line Sizes. This function will fix all of + * those registers with the appropriate values as passed in ... + */ +int t4_fixup_host_params(struct adapter *adap, unsigned int page_size, + unsigned int cache_line_size) +{ + unsigned int page_shift = fls(page_size) - 1; + unsigned int sge_hps = page_shift - 10; + unsigned int stat_len = cache_line_size > 64 ? 128 : 64; + unsigned int fl_align = cache_line_size < 32 ? 32 : cache_line_size; + unsigned int fl_align_log = fls(fl_align) - 1; + + t4_write_reg(adap, SGE_HOST_PAGE_SIZE, + HOSTPAGESIZEPF0(sge_hps) | + HOSTPAGESIZEPF1(sge_hps) | + HOSTPAGESIZEPF2(sge_hps) | + HOSTPAGESIZEPF3(sge_hps) | + HOSTPAGESIZEPF4(sge_hps) | + HOSTPAGESIZEPF5(sge_hps) | + HOSTPAGESIZEPF6(sge_hps) | + HOSTPAGESIZEPF7(sge_hps)); + + t4_set_reg_field(adap, SGE_CONTROL, + INGPADBOUNDARY(INGPADBOUNDARY_MASK) | + EGRSTATUSPAGESIZE_MASK, + INGPADBOUNDARY(fl_align_log - 5) | + EGRSTATUSPAGESIZE(stat_len != 64)); + + /* + * Adjust various SGE Free List Host Buffer Sizes. + * + * This is something of a crock since we're using fixed indices into + * the array which are also known by the sge.c code and the T4 + * Firmware Configuration File. We need to come up with a much better + * approach to managing this array. For now, the first four entries + * are: + * + * 0: Host Page Size + * 1: 64KB + * 2: Buffer size corresponding to 1500 byte MTU (unpacked mode) + * 3: Buffer size corresponding to 9000 byte MTU (unpacked mode) + * + * For the single-MTU buffers in unpacked mode we need to include + * space for the SGE Control Packet Shift, 14 byte Ethernet header, + * possible 4 byte VLAN tag, all rounded up to the next Ingress Packet + * Padding boundry. All of these are accommodated in the Factory + * Default Firmware Configuration File but we need to adjust it for + * this host's cache line size. + */ + t4_write_reg(adap, SGE_FL_BUFFER_SIZE0, page_size); + t4_write_reg(adap, SGE_FL_BUFFER_SIZE2, + (t4_read_reg(adap, SGE_FL_BUFFER_SIZE2) + fl_align-1) + & ~(fl_align-1)); + t4_write_reg(adap, SGE_FL_BUFFER_SIZE3, + (t4_read_reg(adap, SGE_FL_BUFFER_SIZE3) + fl_align-1) + & ~(fl_align-1)); + + t4_write_reg(adap, ULP_RX_TDDP_PSZ, HPZ0(page_shift - 12)); + + return 0; +} + +/** + * t4_fw_initialize - ask FW to initialize the device + * @adap: the adapter + * @mbox: mailbox to use for the FW command + * + * Issues a command to FW to partially initialize the device. This + * performs initialization that generally doesn't depend on user input. + */ +int t4_fw_initialize(struct adapter *adap, unsigned int mbox) +{ + struct fw_initialize_cmd c; + + memset(&c, 0, sizeof(c)); + INIT_CMD(c, INITIALIZE, WRITE); + return t4_wr_mbox(adap, mbox, &c, sizeof(c), NULL); +} + +/** * t4_query_params - query FW or device parameters * @adap: the adapter * @mbox: mailbox to use for the FW command @@ -2741,11 +3420,9 @@ static void __devinit get_pci_mode(struct adapter *adapter, struct pci_params *p) { u16 val; - u32 pcie_cap = pci_pcie_cap(adapter->pdev); - if (pcie_cap) { - pci_read_config_word(adapter->pdev, pcie_cap + PCI_EXP_LNKSTA, - &val); + if (pci_is_pcie(adapter->pdev)) { + pcie_capability_read_word(adapter->pdev, PCI_EXP_LNKSTA, &val); p->speed = val & PCI_EXP_LNKSTA_CLS; p->width = (val & PCI_EXP_LNKSTA_NLW) >> 4; } @@ -2837,10 +3514,6 @@ int __devinit t4_prep_adapter(struct adapter *adapter) return ret; } - ret = get_vpd_params(adapter, &adapter->params.vpd); - if (ret < 0) - return ret; - init_cong_ctrl(adapter->params.a_wnd, adapter->params.b_wnd); /* @@ -2848,6 +3521,7 @@ int __devinit t4_prep_adapter(struct adapter *adapter) */ adapter->params.nports = 1; adapter->params.portvec = 1; + adapter->params.vpd.cclk = 50000; return 0; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index c26b455f37d..f534ed7e10e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -58,6 +58,7 @@ enum { enum { SF_PAGE_SIZE = 256, /* serial flash page size */ + SF_SEC_SIZE = 64 * 1024, /* serial flash sector size */ }; enum { RSP_TYPE_FLBUF, RSP_TYPE_CPL, RSP_TYPE_INTR }; /* response entry types */ @@ -137,4 +138,83 @@ struct rsp_ctrl { #define QINTR_CNT_EN 0x1 #define QINTR_TIMER_IDX(x) ((x) << 1) #define QINTR_TIMER_IDX_GET(x) (((x) >> 1) & 0x7) + +/* + * Flash layout. + */ +#define FLASH_START(start) ((start) * SF_SEC_SIZE) +#define FLASH_MAX_SIZE(nsecs) ((nsecs) * SF_SEC_SIZE) + +enum { + /* + * Various Expansion-ROM boot images, etc. + */ + FLASH_EXP_ROM_START_SEC = 0, + FLASH_EXP_ROM_NSECS = 6, + FLASH_EXP_ROM_START = FLASH_START(FLASH_EXP_ROM_START_SEC), + FLASH_EXP_ROM_MAX_SIZE = FLASH_MAX_SIZE(FLASH_EXP_ROM_NSECS), + + /* + * iSCSI Boot Firmware Table (iBFT) and other driver-related + * parameters ... + */ + FLASH_IBFT_START_SEC = 6, + FLASH_IBFT_NSECS = 1, + FLASH_IBFT_START = FLASH_START(FLASH_IBFT_START_SEC), + FLASH_IBFT_MAX_SIZE = FLASH_MAX_SIZE(FLASH_IBFT_NSECS), + + /* + * Boot configuration data. + */ + FLASH_BOOTCFG_START_SEC = 7, + FLASH_BOOTCFG_NSECS = 1, + FLASH_BOOTCFG_START = FLASH_START(FLASH_BOOTCFG_START_SEC), + FLASH_BOOTCFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_BOOTCFG_NSECS), + + /* + * Location of firmware image in FLASH. + */ + FLASH_FW_START_SEC = 8, + FLASH_FW_NSECS = 8, + FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC), + FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS), + + /* + * iSCSI persistent/crash information. + */ + FLASH_ISCSI_CRASH_START_SEC = 29, + FLASH_ISCSI_CRASH_NSECS = 1, + FLASH_ISCSI_CRASH_START = FLASH_START(FLASH_ISCSI_CRASH_START_SEC), + FLASH_ISCSI_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_ISCSI_CRASH_NSECS), + + /* + * FCoE persistent/crash information. + */ + FLASH_FCOE_CRASH_START_SEC = 30, + FLASH_FCOE_CRASH_NSECS = 1, + FLASH_FCOE_CRASH_START = FLASH_START(FLASH_FCOE_CRASH_START_SEC), + FLASH_FCOE_CRASH_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FCOE_CRASH_NSECS), + + /* + * Location of Firmware Configuration File in FLASH. Since the FPGA + * "FLASH" is smaller we need to store the Configuration File in a + * different location -- which will overlap the end of the firmware + * image if firmware ever gets that large ... + */ + FLASH_CFG_START_SEC = 31, + FLASH_CFG_NSECS = 1, + FLASH_CFG_START = FLASH_START(FLASH_CFG_START_SEC), + FLASH_CFG_MAX_SIZE = FLASH_MAX_SIZE(FLASH_CFG_NSECS), + + FLASH_FPGA_CFG_START_SEC = 15, + FLASH_FPGA_CFG_START = FLASH_START(FLASH_FPGA_CFG_START_SEC), + + /* + * Sectors 32-63 are reserved for FLASH failover. + */ +}; + +#undef FLASH_START +#undef FLASH_MAX_SIZE + #endif /* __T4_HW_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index 111fc323f15..a1a8b57200f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -86,10 +86,17 @@ #define CIDXINC_SHIFT 0 #define CIDXINC(x) ((x) << CIDXINC_SHIFT) +#define X_RXPKTCPLMODE_SPLIT 1 +#define X_INGPADBOUNDARY_SHIFT 5 + #define SGE_CONTROL 0x1008 #define DCASYSTYPE 0x00080000U -#define RXPKTCPLMODE 0x00040000U -#define EGRSTATUSPAGESIZE 0x00020000U +#define RXPKTCPLMODE_MASK 0x00040000U +#define RXPKTCPLMODE_SHIFT 18 +#define RXPKTCPLMODE(x) ((x) << RXPKTCPLMODE_SHIFT) +#define EGRSTATUSPAGESIZE_MASK 0x00020000U +#define EGRSTATUSPAGESIZE_SHIFT 17 +#define EGRSTATUSPAGESIZE(x) ((x) << EGRSTATUSPAGESIZE_SHIFT) #define PKTSHIFT_MASK 0x00001c00U #define PKTSHIFT_SHIFT 10 #define PKTSHIFT(x) ((x) << PKTSHIFT_SHIFT) @@ -108,6 +115,35 @@ #define GLOBALENABLE 0x00000001U #define SGE_HOST_PAGE_SIZE 0x100c + +#define HOSTPAGESIZEPF7_MASK 0x0000000fU +#define HOSTPAGESIZEPF7_SHIFT 28 +#define HOSTPAGESIZEPF7(x) ((x) << HOSTPAGESIZEPF7_SHIFT) + +#define HOSTPAGESIZEPF6_MASK 0x0000000fU +#define HOSTPAGESIZEPF6_SHIFT 24 +#define HOSTPAGESIZEPF6(x) ((x) << HOSTPAGESIZEPF6_SHIFT) + +#define HOSTPAGESIZEPF5_MASK 0x0000000fU +#define HOSTPAGESIZEPF5_SHIFT 20 +#define HOSTPAGESIZEPF5(x) ((x) << HOSTPAGESIZEPF5_SHIFT) + +#define HOSTPAGESIZEPF4_MASK 0x0000000fU +#define HOSTPAGESIZEPF4_SHIFT 16 +#define HOSTPAGESIZEPF4(x) ((x) << HOSTPAGESIZEPF4_SHIFT) + +#define HOSTPAGESIZEPF3_MASK 0x0000000fU +#define HOSTPAGESIZEPF3_SHIFT 12 +#define HOSTPAGESIZEPF3(x) ((x) << HOSTPAGESIZEPF3_SHIFT) + +#define HOSTPAGESIZEPF2_MASK 0x0000000fU +#define HOSTPAGESIZEPF2_SHIFT 8 +#define HOSTPAGESIZEPF2(x) ((x) << HOSTPAGESIZEPF2_SHIFT) + +#define HOSTPAGESIZEPF1_MASK 0x0000000fU +#define HOSTPAGESIZEPF1_SHIFT 4 +#define HOSTPAGESIZEPF1(x) ((x) << HOSTPAGESIZEPF1_SHIFT) + #define HOSTPAGESIZEPF0_MASK 0x0000000fU #define HOSTPAGESIZEPF0_SHIFT 0 #define HOSTPAGESIZEPF0(x) ((x) << HOSTPAGESIZEPF0_SHIFT) @@ -155,6 +191,8 @@ #define SGE_INT_ENABLE3 0x1040 #define SGE_FL_BUFFER_SIZE0 0x1044 #define SGE_FL_BUFFER_SIZE1 0x1048 +#define SGE_FL_BUFFER_SIZE2 0x104c +#define SGE_FL_BUFFER_SIZE3 0x1050 #define SGE_INGRESS_RX_THRESHOLD 0x10a0 #define THRESHOLD_0_MASK 0x3f000000U #define THRESHOLD_0_SHIFT 24 @@ -173,6 +211,12 @@ #define THRESHOLD_3(x) ((x) << THRESHOLD_3_SHIFT) #define THRESHOLD_3_GET(x) (((x) & THRESHOLD_3_MASK) >> THRESHOLD_3_SHIFT) +#define SGE_CONM_CTRL 0x1094 +#define EGRTHRESHOLD_MASK 0x00003f00U +#define EGRTHRESHOLDshift 8 +#define EGRTHRESHOLD(x) ((x) << EGRTHRESHOLDshift) +#define EGRTHRESHOLD_GET(x) (((x) & EGRTHRESHOLD_MASK) >> EGRTHRESHOLDshift) + #define SGE_TIMER_VALUE_0_AND_1 0x10b8 #define TIMERVALUE0_MASK 0xffff0000U #define TIMERVALUE0_SHIFT 16 @@ -184,64 +228,54 @@ #define TIMERVALUE1_GET(x) (((x) & TIMERVALUE1_MASK) >> TIMERVALUE1_SHIFT) #define SGE_TIMER_VALUE_2_AND_3 0x10bc +#define TIMERVALUE2_MASK 0xffff0000U +#define TIMERVALUE2_SHIFT 16 +#define TIMERVALUE2(x) ((x) << TIMERVALUE2_SHIFT) +#define TIMERVALUE2_GET(x) (((x) & TIMERVALUE2_MASK) >> TIMERVALUE2_SHIFT) +#define TIMERVALUE3_MASK 0x0000ffffU +#define TIMERVALUE3_SHIFT 0 +#define TIMERVALUE3(x) ((x) << TIMERVALUE3_SHIFT) +#define TIMERVALUE3_GET(x) (((x) & TIMERVALUE3_MASK) >> TIMERVALUE3_SHIFT) + #define SGE_TIMER_VALUE_4_AND_5 0x10c0 +#define TIMERVALUE4_MASK 0xffff0000U +#define TIMERVALUE4_SHIFT 16 +#define TIMERVALUE4(x) ((x) << TIMERVALUE4_SHIFT) +#define TIMERVALUE4_GET(x) (((x) & TIMERVALUE4_MASK) >> TIMERVALUE4_SHIFT) +#define TIMERVALUE5_MASK 0x0000ffffU +#define TIMERVALUE5_SHIFT 0 +#define TIMERVALUE5(x) ((x) << TIMERVALUE5_SHIFT) +#define TIMERVALUE5_GET(x) (((x) & TIMERVALUE5_MASK) >> TIMERVALUE5_SHIFT) + #define SGE_DEBUG_INDEX 0x10cc #define SGE_DEBUG_DATA_HIGH 0x10d0 #define SGE_DEBUG_DATA_LOW 0x10d4 #define SGE_INGRESS_QUEUES_PER_PAGE_PF 0x10f4 -#define S_LP_INT_THRESH 12 -#define V_LP_INT_THRESH(x) ((x) << S_LP_INT_THRESH) #define S_HP_INT_THRESH 28 +#define M_HP_INT_THRESH 0xfU #define V_HP_INT_THRESH(x) ((x) << S_HP_INT_THRESH) +#define M_HP_COUNT 0x7ffU +#define S_HP_COUNT 16 +#define G_HP_COUNT(x) (((x) >> S_HP_COUNT) & M_HP_COUNT) +#define S_LP_INT_THRESH 12 +#define M_LP_INT_THRESH 0xfU +#define V_LP_INT_THRESH(x) ((x) << S_LP_INT_THRESH) +#define M_LP_COUNT 0x7ffU +#define S_LP_COUNT 0 +#define G_LP_COUNT(x) (((x) >> S_LP_COUNT) & M_LP_COUNT) #define A_SGE_DBFIFO_STATUS 0x10a4 #define S_ENABLE_DROP 13 #define V_ENABLE_DROP(x) ((x) << S_ENABLE_DROP) #define F_ENABLE_DROP V_ENABLE_DROP(1U) -#define A_SGE_DOORBELL_CONTROL 0x10a8 - -#define A_SGE_CTXT_CMD 0x11fc -#define A_SGE_DBQ_CTXT_BADDR 0x1084 - -#define A_SGE_PF_KDOORBELL 0x0 - -#define S_QID 15 -#define V_QID(x) ((x) << S_QID) - -#define S_PIDX 0 -#define V_PIDX(x) ((x) << S_PIDX) - -#define M_LP_COUNT 0x7ffU -#define S_LP_COUNT 0 -#define G_LP_COUNT(x) (((x) >> S_LP_COUNT) & M_LP_COUNT) - -#define M_HP_COUNT 0x7ffU -#define S_HP_COUNT 16 -#define G_HP_COUNT(x) (((x) >> S_HP_COUNT) & M_HP_COUNT) - -#define A_SGE_INT_ENABLE3 0x1040 - -#define S_DBFIFO_HP_INT 8 -#define V_DBFIFO_HP_INT(x) ((x) << S_DBFIFO_HP_INT) -#define F_DBFIFO_HP_INT V_DBFIFO_HP_INT(1U) - -#define S_DBFIFO_LP_INT 7 -#define V_DBFIFO_LP_INT(x) ((x) << S_DBFIFO_LP_INT) -#define F_DBFIFO_LP_INT V_DBFIFO_LP_INT(1U) - #define S_DROPPED_DB 0 #define V_DROPPED_DB(x) ((x) << S_DROPPED_DB) #define F_DROPPED_DB V_DROPPED_DB(1U) +#define A_SGE_DOORBELL_CONTROL 0x10a8 -#define S_ERR_DROPPED_DB 18 -#define V_ERR_DROPPED_DB(x) ((x) << S_ERR_DROPPED_DB) -#define F_ERR_DROPPED_DB V_ERR_DROPPED_DB(1U) - -#define A_PCIE_MEM_ACCESS_OFFSET 0x306c - -#define M_HP_INT_THRESH 0xfU -#define M_LP_INT_THRESH 0xfU +#define A_SGE_CTXT_CMD 0x11fc +#define A_SGE_DBQ_CTXT_BADDR 0x1084 #define PCIE_PF_CLI 0x44 #define PCIE_INT_CAUSE 0x3004 @@ -287,6 +321,8 @@ #define WINDOW(x) ((x) << WINDOW_SHIFT) #define PCIE_MEM_ACCESS_OFFSET 0x306c +#define PCIE_FW 0x30b8 + #define PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS 0x5908 #define RNPP 0x80000000U #define RPCP 0x20000000U @@ -364,7 +400,7 @@ #define MEM_WRAP_CLIENT_NUM_MASK 0x0000000fU #define MEM_WRAP_CLIENT_NUM_SHIFT 0 #define MEM_WRAP_CLIENT_NUM_GET(x) (((x) & MEM_WRAP_CLIENT_NUM_MASK) >> MEM_WRAP_CLIENT_NUM_SHIFT) - +#define MA_PCIE_FW 0x30b8 #define MA_PARITY_ERROR_STATUS 0x77f4 #define EDC_0_BASE_ADDR 0x7900 @@ -385,6 +421,7 @@ #define CIM_BOOT_CFG 0x7b00 #define BOOTADDR_MASK 0xffffff00U +#define UPCRST 0x1U #define CIM_PF_MAILBOX_DATA 0x240 #define CIM_PF_MAILBOX_CTRL 0x280 @@ -457,6 +494,13 @@ #define VLANEXTENABLE_MASK 0x0000f000U #define VLANEXTENABLE_SHIFT 12 +#define TP_GLOBAL_CONFIG 0x7d08 +#define FIVETUPLELOOKUP_SHIFT 17 +#define FIVETUPLELOOKUP_MASK 0x00060000U +#define FIVETUPLELOOKUP(x) ((x) << FIVETUPLELOOKUP_SHIFT) +#define FIVETUPLELOOKUP_GET(x) (((x) & FIVETUPLELOOKUP_MASK) >> \ + FIVETUPLELOOKUP_SHIFT) + #define TP_PARA_REG2 0x7d68 #define MAXRXDATA_MASK 0xffff0000U #define MAXRXDATA_SHIFT 16 @@ -466,8 +510,47 @@ #define TIMERRESOLUTION_MASK 0x00ff0000U #define TIMERRESOLUTION_SHIFT 16 #define TIMERRESOLUTION_GET(x) (((x) & TIMERRESOLUTION_MASK) >> TIMERRESOLUTION_SHIFT) +#define DELAYEDACKRESOLUTION_MASK 0x000000ffU +#define DELAYEDACKRESOLUTION_SHIFT 0 +#define DELAYEDACKRESOLUTION_GET(x) \ + (((x) & DELAYEDACKRESOLUTION_MASK) >> DELAYEDACKRESOLUTION_SHIFT) #define TP_SHIFT_CNT 0x7dc0 +#define SYNSHIFTMAX_SHIFT 24 +#define SYNSHIFTMAX_MASK 0xff000000U +#define SYNSHIFTMAX(x) ((x) << SYNSHIFTMAX_SHIFT) +#define SYNSHIFTMAX_GET(x) (((x) & SYNSHIFTMAX_MASK) >> \ + SYNSHIFTMAX_SHIFT) +#define RXTSHIFTMAXR1_SHIFT 20 +#define RXTSHIFTMAXR1_MASK 0x00f00000U +#define RXTSHIFTMAXR1(x) ((x) << RXTSHIFTMAXR1_SHIFT) +#define RXTSHIFTMAXR1_GET(x) (((x) & RXTSHIFTMAXR1_MASK) >> \ + RXTSHIFTMAXR1_SHIFT) +#define RXTSHIFTMAXR2_SHIFT 16 +#define RXTSHIFTMAXR2_MASK 0x000f0000U +#define RXTSHIFTMAXR2(x) ((x) << RXTSHIFTMAXR2_SHIFT) +#define RXTSHIFTMAXR2_GET(x) (((x) & RXTSHIFTMAXR2_MASK) >> \ + RXTSHIFTMAXR2_SHIFT) +#define PERSHIFTBACKOFFMAX_SHIFT 12 +#define PERSHIFTBACKOFFMAX_MASK 0x0000f000U +#define PERSHIFTBACKOFFMAX(x) ((x) << PERSHIFTBACKOFFMAX_SHIFT) +#define PERSHIFTBACKOFFMAX_GET(x) (((x) & PERSHIFTBACKOFFMAX_MASK) >> \ + PERSHIFTBACKOFFMAX_SHIFT) +#define PERSHIFTMAX_SHIFT 8 +#define PERSHIFTMAX_MASK 0x00000f00U +#define PERSHIFTMAX(x) ((x) << PERSHIFTMAX_SHIFT) +#define PERSHIFTMAX_GET(x) (((x) & PERSHIFTMAX_MASK) >> \ + PERSHIFTMAX_SHIFT) +#define KEEPALIVEMAXR1_SHIFT 4 +#define KEEPALIVEMAXR1_MASK 0x000000f0U +#define KEEPALIVEMAXR1(x) ((x) << KEEPALIVEMAXR1_SHIFT) +#define KEEPALIVEMAXR1_GET(x) (((x) & KEEPALIVEMAXR1_MASK) >> \ + KEEPALIVEMAXR1_SHIFT) +#define KEEPALIVEMAXR2_SHIFT 0 +#define KEEPALIVEMAXR2_MASK 0x0000000fU +#define KEEPALIVEMAXR2(x) ((x) << KEEPALIVEMAXR2_SHIFT) +#define KEEPALIVEMAXR2_GET(x) (((x) & KEEPALIVEMAXR2_MASK) >> \ + KEEPALIVEMAXR2_SHIFT) #define TP_CCTRL_TABLE 0x7ddc #define TP_MTU_TABLE 0x7de4 @@ -501,6 +584,20 @@ #define TP_INT_CAUSE 0x7e74 #define FLMTXFLSTEMPTY 0x40000000U +#define TP_VLAN_PRI_MAP 0x140 +#define FRAGMENTATION_SHIFT 9 +#define FRAGMENTATION_MASK 0x00000200U +#define MPSHITTYPE_MASK 0x00000100U +#define MACMATCH_MASK 0x00000080U +#define ETHERTYPE_MASK 0x00000040U +#define PROTOCOL_MASK 0x00000020U +#define TOS_MASK 0x00000010U +#define VLAN_MASK 0x00000008U +#define VNIC_ID_MASK 0x00000004U +#define PORT_MASK 0x00000002U +#define FCOE_SHIFT 0 +#define FCOE_MASK 0x00000001U + #define TP_INGRESS_CONFIG 0x141 #define VNIC 0x00000800U #define CSUM_HAS_PSEUDO_HDR 0x00000400U diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index ad53f796b57..a6364632b49 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -79,6 +79,8 @@ struct fw_wr_hdr { #define FW_WR_FLOWID(x) ((x) << 8) #define FW_WR_LEN16(x) ((x) << 0) +#define HW_TPL_FR_MT_PR_IV_P_FC 0X32B + struct fw_ulptx_wr { __be32 op_to_compl; __be32 flowid_len16; @@ -155,6 +157,17 @@ struct fw_eth_tx_pkt_vm_wr { #define FW_CMD_MAX_TIMEOUT 3000 +/* + * If a host driver does a HELLO and discovers that there's already a MASTER + * selected, we may have to wait for that MASTER to finish issuing RESET, + * configuration and INITIALIZE commands. Also, there's a possibility that + * our own HELLO may get lost if it happens right as the MASTER is issuign a + * RESET command, so we need to be willing to make a few retries of our HELLO. + */ +#define FW_CMD_HELLO_TIMEOUT (3 * FW_CMD_MAX_TIMEOUT) +#define FW_CMD_HELLO_RETRIES 3 + + enum fw_cmd_opcodes { FW_LDST_CMD = 0x01, FW_RESET_CMD = 0x03, @@ -304,7 +317,17 @@ struct fw_reset_cmd { __be32 op_to_write; __be32 retval_len16; __be32 val; - __be32 r3; + __be32 halt_pkd; +}; + +#define FW_RESET_CMD_HALT_SHIFT 31 +#define FW_RESET_CMD_HALT_MASK 0x1 +#define FW_RESET_CMD_HALT(x) ((x) << FW_RESET_CMD_HALT_SHIFT) +#define FW_RESET_CMD_HALT_GET(x) \ + (((x) >> FW_RESET_CMD_HALT_SHIFT) & FW_RESET_CMD_HALT_MASK) + +enum fw_hellow_cmd { + fw_hello_cmd_stage_os = 0x0 }; struct fw_hello_cmd { @@ -315,8 +338,14 @@ struct fw_hello_cmd { #define FW_HELLO_CMD_INIT (1U << 30) #define FW_HELLO_CMD_MASTERDIS(x) ((x) << 29) #define FW_HELLO_CMD_MASTERFORCE(x) ((x) << 28) -#define FW_HELLO_CMD_MBMASTER(x) ((x) << 24) +#define FW_HELLO_CMD_MBMASTER_MASK 0xfU +#define FW_HELLO_CMD_MBMASTER_SHIFT 24 +#define FW_HELLO_CMD_MBMASTER(x) ((x) << FW_HELLO_CMD_MBMASTER_SHIFT) +#define FW_HELLO_CMD_MBMASTER_GET(x) \ + (((x) >> FW_HELLO_CMD_MBMASTER_SHIFT) & FW_HELLO_CMD_MBMASTER_MASK) #define FW_HELLO_CMD_MBASYNCNOT(x) ((x) << 20) +#define FW_HELLO_CMD_STAGE(x) ((x) << 17) +#define FW_HELLO_CMD_CLEARINIT (1U << 16) __be32 fwrev; }; @@ -401,6 +430,14 @@ enum fw_caps_config_fcoe { FW_CAPS_CONFIG_FCOE_TARGET = 0x00000002, }; +enum fw_memtype_cf { + FW_MEMTYPE_CF_EDC0 = 0x0, + FW_MEMTYPE_CF_EDC1 = 0x1, + FW_MEMTYPE_CF_EXTMEM = 0x2, + FW_MEMTYPE_CF_FLASH = 0x4, + FW_MEMTYPE_CF_INTERNAL = 0x5, +}; + struct fw_caps_config_cmd { __be32 op_to_write; __be32 retval_len16; @@ -416,10 +453,15 @@ struct fw_caps_config_cmd { __be16 r4; __be16 iscsicaps; __be16 fcoecaps; - __be32 r5; - __be64 r6; + __be32 cfcsum; + __be32 finiver; + __be32 finicsum; }; +#define FW_CAPS_CONFIG_CMD_CFVALID (1U << 27) +#define FW_CAPS_CONFIG_CMD_MEMTYPE_CF(x) ((x) << 24) +#define FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(x) ((x) << 16) + /* * params command mnemonics */ @@ -451,6 +493,7 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_INTVER_FCOE = 0x0A, FW_PARAMS_PARAM_DEV_FWREV = 0x0B, FW_PARAMS_PARAM_DEV_TPREV = 0x0C, + FW_PARAMS_PARAM_DEV_CF = 0x0D, }; /* @@ -492,6 +535,8 @@ enum fw_params_param_pfvf { FW_PARAMS_PARAM_PFVF_IQFLINT_END = 0x2A, FW_PARAMS_PARAM_PFVF_EQ_START = 0x2B, FW_PARAMS_PARAM_PFVF_EQ_END = 0x2C, + FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_START = 0x2D, + FW_PARAMS_PARAM_PFVF_ACTIVE_FILTER_END = 0x2E }; /* @@ -507,8 +552,16 @@ enum fw_params_param_dmaq { #define FW_PARAMS_MNEM(x) ((x) << 24) #define FW_PARAMS_PARAM_X(x) ((x) << 16) -#define FW_PARAMS_PARAM_Y(x) ((x) << 8) -#define FW_PARAMS_PARAM_Z(x) ((x) << 0) +#define FW_PARAMS_PARAM_Y_SHIFT 8 +#define FW_PARAMS_PARAM_Y_MASK 0xffU +#define FW_PARAMS_PARAM_Y(x) ((x) << FW_PARAMS_PARAM_Y_SHIFT) +#define FW_PARAMS_PARAM_Y_GET(x) (((x) >> FW_PARAMS_PARAM_Y_SHIFT) &\ + FW_PARAMS_PARAM_Y_MASK) +#define FW_PARAMS_PARAM_Z_SHIFT 0 +#define FW_PARAMS_PARAM_Z_MASK 0xffu +#define FW_PARAMS_PARAM_Z(x) ((x) << FW_PARAMS_PARAM_Z_SHIFT) +#define FW_PARAMS_PARAM_Z_GET(x) (((x) >> FW_PARAMS_PARAM_Z_SHIFT) &\ + FW_PARAMS_PARAM_Z_MASK) #define FW_PARAMS_PARAM_XYZ(x) ((x) << 0) #define FW_PARAMS_PARAM_YZ(x) ((x) << 0) @@ -1599,6 +1652,16 @@ struct fw_debug_cmd { } u; }; +#define FW_PCIE_FW_ERR (1U << 31) +#define FW_PCIE_FW_INIT (1U << 30) +#define FW_PCIE_FW_HALT (1U << 29) +#define FW_PCIE_FW_MASTER_VLD (1U << 15) +#define FW_PCIE_FW_MASTER_MASK 0x7 +#define FW_PCIE_FW_MASTER_SHIFT 12 +#define FW_PCIE_FW_MASTER(x) ((x) << FW_PCIE_FW_MASTER_SHIFT) +#define FW_PCIE_FW_MASTER_GET(x) (((x) >> FW_PCIE_FW_MASTER_SHIFT) & \ + FW_PCIE_FW_MASTER_MASK) + struct fw_hdr { u8 ver; u8 reserved1; @@ -1613,7 +1676,11 @@ struct fw_hdr { u8 intfver_iscsi; u8 intfver_fcoe; u8 reserved2; - __be32 reserved3[27]; + __u32 reserved3; + __u32 reserved4; + __u32 reserved5; + __be32 flags; + __be32 reserved6[23]; }; #define FW_HDR_FW_VER_MAJOR_GET(x) (((x) >> 24) & 0xff) @@ -1621,18 +1688,8 @@ struct fw_hdr { #define FW_HDR_FW_VER_MICRO_GET(x) (((x) >> 8) & 0xff) #define FW_HDR_FW_VER_BUILD_GET(x) (((x) >> 0) & 0xff) -#define S_FW_CMD_OP 24 -#define V_FW_CMD_OP(x) ((x) << S_FW_CMD_OP) - -#define S_FW_CMD_REQUEST 23 -#define V_FW_CMD_REQUEST(x) ((x) << S_FW_CMD_REQUEST) -#define F_FW_CMD_REQUEST V_FW_CMD_REQUEST(1U) - -#define S_FW_CMD_WRITE 21 -#define V_FW_CMD_WRITE(x) ((x) << S_FW_CMD_WRITE) -#define F_FW_CMD_WRITE V_FW_CMD_WRITE(1U) - -#define S_FW_LDST_CMD_ADDRSPACE 0 -#define V_FW_LDST_CMD_ADDRSPACE(x) ((x) << S_FW_LDST_CMD_ADDRSPACE) +enum fw_hdr_flags { + FW_HDR_FLAGS_RESET_HALT = 0x00000001, +}; #endif /* _T4FW_INTERFACE_H_ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index 8877fbfefb6..f16745f4b36 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -2421,7 +2421,7 @@ int t4vf_sge_init(struct adapter *adapter) fl0, fl1); return -EINVAL; } - if ((sge_params->sge_control & RXPKTCPLMODE) == 0) { + if ((sge_params->sge_control & RXPKTCPLMODE_MASK) == 0) { dev_err(adapter->pdev_dev, "bad SGE CPL MODE\n"); return -EINVAL; } @@ -2431,7 +2431,8 @@ int t4vf_sge_init(struct adapter *adapter) */ if (fl1) FL_PG_ORDER = ilog2(fl1) - PAGE_SHIFT; - STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE) ? 128 : 64); + STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE_MASK) + ? 128 : 64); PKTSHIFT = PKTSHIFT_GET(sge_params->sge_control); FL_ALIGN = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) + SGE_INGPADBOUNDARY_SHIFT); diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index d266c86a53f..cf4c05bdf5f 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -110,6 +110,7 @@ static inline char *nic_name(struct pci_dev *pdev) #define MAX_RX_POST BE_NAPI_WEIGHT /* Frags posted at a time */ #define RX_FRAGS_REFILL_WM (RX_Q_LEN - MAX_RX_POST) +#define MAX_VFS 30 /* Max VFs supported by BE3 FW */ #define FW_VER_LEN 32 struct be_dma_mem { @@ -336,7 +337,6 @@ struct phy_info { u16 auto_speeds_supported; u16 fixed_speeds_supported; int link_speed; - int forced_port_speed; u32 dac_cable_len; u32 advertising; u32 supported; diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 8c63d06ab12..af60bb26e33 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -120,7 +120,7 @@ static int be_mcc_compl_process(struct be_adapter *adapter, if (compl_status == MCC_STATUS_UNAUTHORIZED_REQUEST) { dev_warn(&adapter->pdev->dev, - "opcode %d-%d is not permitted\n", + "VF is not privileged to issue opcode %d-%d\n", opcode, subsystem); } else { extd_status = (compl->status >> CQE_STATUS_EXTD_SHIFT) & @@ -165,14 +165,13 @@ static void be_async_grp5_cos_priority_process(struct be_adapter *adapter, } } -/* Grp5 QOS Speed evt */ +/* Grp5 QOS Speed evt: qos_link_speed is in units of 10 Mbps */ static void be_async_grp5_qos_speed_process(struct be_adapter *adapter, struct be_async_event_grp5_qos_link_speed *evt) { - if (evt->physical_port == adapter->port_num) { - /* qos_link_speed is in units of 10 Mbps */ - adapter->phy.link_speed = evt->qos_link_speed * 10; - } + if (adapter->phy.link_speed >= 0 && + evt->physical_port == adapter->port_num) + adapter->phy.link_speed = le16_to_cpu(evt->qos_link_speed) * 10; } /*Grp5 PVID evt*/ @@ -717,7 +716,7 @@ int be_cmd_eq_create(struct be_adapter *adapter, /* Use MCC */ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, - u8 type, bool permanent, u32 if_handle, u32 pmac_id) + bool permanent, u32 if_handle, u32 pmac_id) { struct be_mcc_wrb *wrb; struct be_cmd_req_mac_query *req; @@ -734,7 +733,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, OPCODE_COMMON_NTWK_MAC_QUERY, sizeof(*req), wrb, NULL); - req->type = type; + req->type = MAC_ADDRESS_TYPE_NETWORK; if (permanent) { req->permanent = 1; } else { @@ -1326,9 +1325,28 @@ err: return status; } -/* Uses synchronous mcc */ -int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed, - u16 *link_speed, u8 *link_status, u32 dom) +static int be_mac_to_link_speed(int mac_speed) +{ + switch (mac_speed) { + case PHY_LINK_SPEED_ZERO: + return 0; + case PHY_LINK_SPEED_10MBPS: + return 10; + case PHY_LINK_SPEED_100MBPS: + return 100; + case PHY_LINK_SPEED_1GBPS: + return 1000; + case PHY_LINK_SPEED_10GBPS: + return 10000; + } + return 0; +} + +/* Uses synchronous mcc + * Returns link_speed in Mbps + */ +int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, + u8 *link_status, u32 dom) { struct be_mcc_wrb *wrb; struct be_cmd_req_link_status *req; @@ -1357,11 +1375,13 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed, status = be_mcc_notify_wait(adapter); if (!status) { struct be_cmd_resp_link_status *resp = embedded_payload(wrb); - if (resp->mac_speed != PHY_LINK_SPEED_ZERO) { - if (link_speed) - *link_speed = le16_to_cpu(resp->link_speed); - if (mac_speed) - *mac_speed = resp->mac_speed; + if (link_speed) { + *link_speed = resp->link_speed ? + le16_to_cpu(resp->link_speed) * 10 : + be_mac_to_link_speed(resp->mac_speed); + + if (!resp->logical_link_status) + *link_speed = 0; } if (link_status) *link_status = resp->logical_link_status; @@ -2405,6 +2425,9 @@ int be_cmd_req_native_mode(struct be_adapter *adapter) struct be_cmd_resp_set_func_cap *resp = embedded_payload(wrb); adapter->be3_native = le32_to_cpu(resp->cap_flags) & CAPABILITY_BE3_NATIVE_ERX_API; + if (!adapter->be3_native) + dev_warn(&adapter->pdev->dev, + "adapter not in advanced mode\n"); } err: mutex_unlock(&adapter->mbox_lock); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 250f19b5f7b..0936e21e3cf 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -1687,7 +1687,7 @@ struct be_cmd_req_set_ext_fat_caps { extern int be_pci_fnum_get(struct be_adapter *adapter); extern int be_fw_wait_ready(struct be_adapter *adapter); extern int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, - u8 type, bool permanent, u32 if_handle, u32 pmac_id); + bool permanent, u32 if_handle, u32 pmac_id); extern int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, u32 if_id, u32 *pmac_id, u32 domain); extern int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, @@ -1714,8 +1714,8 @@ extern int be_cmd_q_destroy(struct be_adapter *adapter, struct be_queue_info *q, int type); extern int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q); -extern int be_cmd_link_status_query(struct be_adapter *adapter, u8 *mac_speed, - u16 *link_speed, u8 *link_status, u32 dom); +extern int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, + u8 *link_status, u32 dom); extern int be_cmd_reset(struct be_adapter *adapter); extern int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index c0e700653f9..8e6fb0ba6aa 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -512,28 +512,6 @@ static u32 convert_to_et_setting(u32 if_type, u32 if_speeds) return val; } -static int convert_to_et_speed(u32 be_speed) -{ - int et_speed = SPEED_10000; - - switch (be_speed) { - case PHY_LINK_SPEED_10MBPS: - et_speed = SPEED_10; - break; - case PHY_LINK_SPEED_100MBPS: - et_speed = SPEED_100; - break; - case PHY_LINK_SPEED_1GBPS: - et_speed = SPEED_1000; - break; - case PHY_LINK_SPEED_10GBPS: - et_speed = SPEED_10000; - break; - } - - return et_speed; -} - bool be_pause_supported(struct be_adapter *adapter) { return (adapter->phy.interface_type == PHY_TYPE_SFP_PLUS_10GB || @@ -544,27 +522,16 @@ bool be_pause_supported(struct be_adapter *adapter) static int be_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) { struct be_adapter *adapter = netdev_priv(netdev); - u8 port_speed = 0; - u16 link_speed = 0; u8 link_status; - u32 et_speed = 0; + u16 link_speed = 0; int status; - if (adapter->phy.link_speed < 0 || !(netdev->flags & IFF_UP)) { - if (adapter->phy.forced_port_speed < 0) { - status = be_cmd_link_status_query(adapter, &port_speed, - &link_speed, &link_status, 0); - if (!status) - be_link_status_update(adapter, link_status); - if (link_speed) - et_speed = link_speed * 10; - else if (link_status) - et_speed = convert_to_et_speed(port_speed); - } else { - et_speed = adapter->phy.forced_port_speed; - } - - ethtool_cmd_speed_set(ecmd, et_speed); + if (adapter->phy.link_speed < 0) { + status = be_cmd_link_status_query(adapter, &link_speed, + &link_status, 0); + if (!status) + be_link_status_update(adapter, link_status); + ethtool_cmd_speed_set(ecmd, link_speed); status = be_cmd_get_phy_info(adapter); if (status) @@ -773,8 +740,8 @@ static void be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data) { struct be_adapter *adapter = netdev_priv(netdev); - u8 mac_speed = 0; - u16 qos_link_speed = 0; + int status; + u8 link_status = 0; memset(data, 0, sizeof(u64) * ETHTOOL_TESTS_NUM); @@ -798,11 +765,11 @@ be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data) test->flags |= ETH_TEST_FL_FAILED; } - if (be_cmd_link_status_query(adapter, &mac_speed, - &qos_link_speed, NULL, 0) != 0) { + status = be_cmd_link_status_query(adapter, NULL, &link_status, 0); + if (status) { test->flags |= ETH_TEST_FL_FAILED; data[4] = -1; - } else if (!mac_speed) { + } else if (!link_status) { test->flags |= ETH_TEST_FL_FAILED; data[4] = 1; } diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 78b8aa8069f..eb3f2cb3b93 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -20,6 +20,7 @@ #include "be.h" #include "be_cmds.h" #include <asm/div64.h> +#include <linux/aer.h> MODULE_VERSION(DRV_VER); MODULE_DEVICE_TABLE(pci, be_dev_ids); @@ -240,9 +241,8 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - status = be_cmd_mac_addr_query(adapter, current_mac, - MAC_ADDRESS_TYPE_NETWORK, false, - adapter->if_handle, 0); + status = be_cmd_mac_addr_query(adapter, current_mac, false, + adapter->if_handle, 0); if (status) goto err; @@ -1075,7 +1075,7 @@ static int be_set_vf_tx_rate(struct net_device *netdev, static int be_find_vfs(struct be_adapter *adapter, int vf_state) { struct pci_dev *dev, *pdev = adapter->pdev; - int vfs = 0, assigned_vfs = 0, pos, vf_fn; + int vfs = 0, assigned_vfs = 0, pos; u16 offset, stride; pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); @@ -1086,9 +1086,7 @@ static int be_find_vfs(struct be_adapter *adapter, int vf_state) dev = pci_get_device(pdev->vendor, PCI_ANY_ID, NULL); while (dev) { - vf_fn = (pdev->devfn + offset + stride * vfs) & 0xFFFF; - if (dev->is_virtfn && dev->devfn == vf_fn && - dev->bus->number == pdev->bus->number) { + if (dev->is_virtfn && pci_physfn(dev) == pdev) { vfs++; if (dev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) assigned_vfs++; @@ -1896,6 +1894,8 @@ static int be_tx_qs_create(struct be_adapter *adapter) return status; } + dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n", + adapter->num_tx_qs); return 0; } @@ -1946,10 +1946,9 @@ static int be_rx_cqs_create(struct be_adapter *adapter) return rc; } - if (adapter->num_rx_qs != MAX_RX_QS) - dev_info(&adapter->pdev->dev, - "Created only %d receive queues\n", adapter->num_rx_qs); - + dev_info(&adapter->pdev->dev, + "created %d RSS queue(s) and 1 default RX queue\n", + adapter->num_rx_qs - 1); return 0; } @@ -2176,8 +2175,7 @@ static uint be_num_rss_want(struct be_adapter *adapter) { u32 num = 0; if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) && - !sriov_want(adapter) && be_physfn(adapter) && - !be_is_mc(adapter)) { + !sriov_want(adapter) && be_physfn(adapter)) { num = (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; num = min_t(u32, num, (u32)netif_get_num_default_rss_queues()); } @@ -2188,6 +2186,7 @@ static void be_msix_enable(struct be_adapter *adapter) { #define BE_MIN_MSIX_VECTORS 1 int i, status, num_vec, num_roce_vec = 0; + struct device *dev = &adapter->pdev->dev; /* If RSS queues are not used, need a vec for default RX Q */ num_vec = min(be_num_rss_want(adapter), num_online_cpus()); @@ -2212,6 +2211,8 @@ static void be_msix_enable(struct be_adapter *adapter) num_vec) == 0) goto done; } + + dev_warn(dev, "MSIx enable failed\n"); return; done: if (be_roce_supported(adapter)) { @@ -2225,6 +2226,7 @@ done: } } else adapter->num_msix_vec = num_vec; + dev_info(dev, "enabled %d MSI-x vector(s)\n", adapter->num_msix_vec); return; } @@ -2441,8 +2443,7 @@ static int be_open(struct net_device *netdev) be_eq_notify(adapter, eqo->q.id, true, false, 0); } - status = be_cmd_link_status_query(adapter, NULL, NULL, - &link_status, 0); + status = be_cmd_link_status_query(adapter, NULL, &link_status, 0); if (!status) be_link_status_update(adapter, link_status); @@ -2646,8 +2647,8 @@ static int be_vf_setup(struct be_adapter *adapter) } for_all_vfs(adapter, vf_cfg, vf) { - status = be_cmd_link_status_query(adapter, NULL, &lnk_speed, - NULL, vf + 1); + lnk_speed = 1000; + status = be_cmd_set_qos(adapter, lnk_speed, vf + 1); if (status) goto err; vf_cfg->tx_rate = lnk_speed * 10; @@ -2671,7 +2672,6 @@ static void be_setup_init(struct be_adapter *adapter) adapter->be3_native = false; adapter->promiscuous = false; adapter->eq_next_idx = 0; - adapter->phy.forced_port_speed = -1; } static int be_get_mac_addr(struct be_adapter *adapter, u8 *mac, u32 if_handle, @@ -2693,21 +2693,16 @@ static int be_get_mac_addr(struct be_adapter *adapter, u8 *mac, u32 if_handle, status = be_cmd_get_mac_from_list(adapter, mac, active_mac, pmac_id, 0); if (*active_mac) { - status = be_cmd_mac_addr_query(adapter, mac, - MAC_ADDRESS_TYPE_NETWORK, - false, if_handle, - *pmac_id); + status = be_cmd_mac_addr_query(adapter, mac, false, + if_handle, *pmac_id); } } else if (be_physfn(adapter)) { /* For BE3, for PF get permanent MAC */ - status = be_cmd_mac_addr_query(adapter, mac, - MAC_ADDRESS_TYPE_NETWORK, true, - 0, 0); + status = be_cmd_mac_addr_query(adapter, mac, true, 0, 0); *active_mac = false; } else { /* For BE3, for VF get soft MAC assigned by PF*/ - status = be_cmd_mac_addr_query(adapter, mac, - MAC_ADDRESS_TYPE_NETWORK, false, + status = be_cmd_mac_addr_query(adapter, mac, false, if_handle, 0); *active_mac = true; } @@ -2724,6 +2719,8 @@ static int be_get_config(struct be_adapter *adapter) if (pos) { pci_read_config_word(adapter->pdev, pos + PCI_SRIOV_TOTAL_VF, &dev_num_vfs); + if (!lancer_chip(adapter)) + dev_num_vfs = min_t(u16, dev_num_vfs, MAX_VFS); adapter->dev_num_vfs = dev_num_vfs; } return 0; @@ -3437,6 +3434,7 @@ static void be_ctrl_cleanup(struct be_adapter *adapter) if (mem->va) dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, mem->dma); + kfree(adapter->pmac_id); } static int be_ctrl_init(struct be_adapter *adapter) @@ -3473,6 +3471,12 @@ static int be_ctrl_init(struct be_adapter *adapter) } memset(rx_filter->va, 0, rx_filter->size); + /* primary mac needs 1 pmac entry */ + adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1, + sizeof(*adapter->pmac_id), GFP_KERNEL); + if (!adapter->pmac_id) + return -ENOMEM; + mutex_init(&adapter->mbox_lock); spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); @@ -3543,6 +3547,8 @@ static void __devexit be_remove(struct pci_dev *pdev) be_ctrl_cleanup(adapter); + pci_disable_pcie_error_reporting(pdev); + pci_set_drvdata(pdev, NULL); pci_release_regions(pdev); pci_disable_device(pdev); @@ -3609,12 +3615,6 @@ static int be_get_initial_config(struct be_adapter *adapter) else adapter->max_pmac_cnt = BE_VF_UC_PMAC_COUNT; - /* primary mac needs 1 pmac entry */ - adapter->pmac_id = kcalloc(adapter->max_pmac_cnt + 1, - sizeof(u32), GFP_KERNEL); - if (!adapter->pmac_id) - return -ENOMEM; - status = be_cmd_get_cntl_attributes(adapter); if (status) return status; @@ -3800,6 +3800,23 @@ static bool be_reset_required(struct be_adapter *adapter) return be_find_vfs(adapter, ENABLED) > 0 ? false : true; } +static char *mc_name(struct be_adapter *adapter) +{ + if (adapter->function_mode & FLEX10_MODE) + return "FLEX10"; + else if (adapter->function_mode & VNIC_MODE) + return "vNIC"; + else if (adapter->function_mode & UMC_ENABLED) + return "UMC"; + else + return ""; +} + +static inline char *func_name(struct be_adapter *adapter) +{ + return be_physfn(adapter) ? "PF" : "VF"; +} + static int __devinit be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) { @@ -3844,6 +3861,10 @@ static int __devinit be_probe(struct pci_dev *pdev, } } + status = pci_enable_pcie_error_reporting(pdev); + if (status) + dev_err(&pdev->dev, "Could not use PCIe error reporting\n"); + status = be_ctrl_init(adapter); if (status) goto free_netdev; @@ -3886,7 +3907,7 @@ static int __devinit be_probe(struct pci_dev *pdev, status = be_setup(adapter); if (status) - goto msix_disable; + goto stats_clean; be_netdev_init(netdev); status = register_netdev(netdev); @@ -3900,15 +3921,13 @@ static int __devinit be_probe(struct pci_dev *pdev, be_cmd_query_port_name(adapter, &port_name); - dev_info(&pdev->dev, "%s: %s port %c\n", netdev->name, nic_name(pdev), - port_name); + dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev), + func_name(adapter), mc_name(adapter), port_name); return 0; unsetup: be_clear(adapter); -msix_disable: - be_msix_disable(adapter); stats_clean: be_stats_cleanup(adapter); ctrl_clean: @@ -4066,6 +4085,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev) if (status) return PCI_ERS_RESULT_DISCONNECT; + pci_cleanup_aer_uncorrect_error_status(pdev); return PCI_ERS_RESULT_RECOVERED; } @@ -4106,7 +4126,7 @@ err: dev_err(&adapter->pdev->dev, "EEH resume failed\n"); } -static struct pci_error_handlers be_eeh_handlers = { +static const struct pci_error_handlers be_eeh_handlers = { .error_detected = be_eeh_err_detected, .slot_reset = be_eeh_reset, .resume = be_eeh_resume, diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 3574e1499df..feff51664dc 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -62,6 +62,13 @@ config FSL_PQ_MDIO ---help--- This driver supports the MDIO bus used by the gianfar and UCC drivers. +config FSL_XGMAC_MDIO + tristate "Freescale XGMAC MDIO" + depends on FSL_SOC + select PHYLIB + ---help--- + This driver supports the MDIO bus on the Fman 10G Ethernet MACs. + config UCC_GETH tristate "Freescale QE Gigabit Ethernet" depends on QUICC_ENGINE diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile index 1752488c9ee..3d1839afff6 100644 --- a/drivers/net/ethernet/freescale/Makefile +++ b/drivers/net/ethernet/freescale/Makefile @@ -9,6 +9,7 @@ ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y) endif obj-$(CONFIG_FS_ENET) += fs_enet/ obj-$(CONFIG_FSL_PQ_MDIO) += fsl_pq_mdio.o +obj-$(CONFIG_FSL_XGMAC_MDIO) += xgmac_mdio.o obj-$(CONFIG_GIANFAR) += gianfar_driver.o obj-$(CONFIG_PTP_1588_CLOCK_GIANFAR) += gianfar_ptp.o gianfar_driver-objs := gianfar.o \ diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 9527b28d70d..c93a05654b4 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -19,54 +19,90 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> -#include <linux/unistd.h> #include <linux/slab.h> -#include <linux/interrupt.h> #include <linux/init.h> #include <linux/delay.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/skbuff.h> -#include <linux/spinlock.h> -#include <linux/mm.h> #include <linux/module.h> -#include <linux/platform_device.h> -#include <linux/crc32.h> #include <linux/mii.h> -#include <linux/phy.h> -#include <linux/of.h> #include <linux/of_address.h> #include <linux/of_mdio.h> -#include <linux/of_platform.h> +#include <linux/of_device.h> #include <asm/io.h> -#include <asm/irq.h> -#include <asm/uaccess.h> -#include <asm/ucc.h> +#include <asm/ucc.h> /* for ucc_set_qe_mux_mii_mng() */ #include "gianfar.h" -#include "fsl_pq_mdio.h" + +#define MIIMIND_BUSY 0x00000001 +#define MIIMIND_NOTVALID 0x00000004 +#define MIIMCFG_INIT_VALUE 0x00000007 +#define MIIMCFG_RESET 0x80000000 + +#define MII_READ_COMMAND 0x00000001 + +struct fsl_pq_mii { + u32 miimcfg; /* MII management configuration reg */ + u32 miimcom; /* MII management command reg */ + u32 miimadd; /* MII management address reg */ + u32 miimcon; /* MII management control reg */ + u32 miimstat; /* MII management status reg */ + u32 miimind; /* MII management indication reg */ +}; + +struct fsl_pq_mdio { + u8 res1[16]; + u32 ieventm; /* MDIO Interrupt event register (for etsec2)*/ + u32 imaskm; /* MDIO Interrupt mask register (for etsec2)*/ + u8 res2[4]; + u32 emapm; /* MDIO Event mapping register (for etsec2)*/ + u8 res3[1280]; + struct fsl_pq_mii mii; + u8 res4[28]; + u32 utbipar; /* TBI phy address reg (only on UCC) */ + u8 res5[2728]; +} __packed; /* Number of microseconds to wait for an MII register to respond */ #define MII_TIMEOUT 1000 struct fsl_pq_mdio_priv { void __iomem *map; - struct fsl_pq_mdio __iomem *regs; + struct fsl_pq_mii __iomem *regs; + int irqs[PHY_MAX_ADDR]; +}; + +/* + * Per-device-type data. Each type of device tree node that we support gets + * one of these. + * + * @mii_offset: the offset of the MII registers within the memory map of the + * node. Some nodes define only the MII registers, and some define the whole + * MAC (which includes the MII registers). + * + * @get_tbipa: determines the address of the TBIPA register + * + * @ucc_configure: a special function for extra QE configuration + */ +struct fsl_pq_mdio_data { + unsigned int mii_offset; /* offset of the MII registers */ + uint32_t __iomem * (*get_tbipa)(void __iomem *p); + void (*ucc_configure)(phys_addr_t start, phys_addr_t end); }; /* - * Write value to the PHY at mii_id at register regnum, - * on the bus attached to the local interface, which may be different from the - * generic mdio bus (tied to a single interface), waiting until the write is - * done before returning. This is helpful in programming interfaces like - * the TBI which control interfaces like onchip SERDES and are always tied to - * the local mdio pins, which may not be the same as system mdio bus, used for + * Write value to the PHY at mii_id at register regnum, on the bus attached + * to the local interface, which may be different from the generic mdio bus + * (tied to a single interface), waiting until the write is done before + * returning. This is helpful in programming interfaces like the TBI which + * control interfaces like onchip SERDES and are always tied to the local + * mdio pins, which may not be the same as system mdio bus, used for * controlling the external PHYs, for example. */ -int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id, - int regnum, u16 value) +static int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum, + u16 value) { + struct fsl_pq_mdio_priv *priv = bus->priv; + struct fsl_pq_mii __iomem *regs = priv->regs; u32 status; /* Set the PHY address and the register address we want to write */ @@ -83,20 +119,21 @@ int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id, } /* - * Read the bus for PHY at addr mii_id, register regnum, and - * return the value. Clears miimcom first. All PHY operation - * done on the bus attached to the local interface, - * which may be different from the generic mdio bus - * This is helpful in programming interfaces like - * the TBI which, in turn, control interfaces like onchip SERDES - * and are always tied to the local mdio pins, which may not be the + * Read the bus for PHY at addr mii_id, register regnum, and return the value. + * Clears miimcom first. + * + * All PHY operation done on the bus attached to the local interface, which + * may be different from the generic mdio bus. This is helpful in programming + * interfaces like the TBI which, in turn, control interfaces like on-chip + * SERDES and are always tied to the local mdio pins, which may not be the * same as system mdio bus, used for controlling the external PHYs, for eg. */ -int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs, - int mii_id, int regnum) +static int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum) { - u16 value; + struct fsl_pq_mdio_priv *priv = bus->priv; + struct fsl_pq_mii __iomem *regs = priv->regs; u32 status; + u16 value; /* Set the PHY address and the register address we want to read */ out_be32(®s->miimadd, (mii_id << 8) | regnum); @@ -115,44 +152,15 @@ int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs, /* Grab the value of the register from miimstat */ value = in_be32(®s->miimstat); + dev_dbg(&bus->dev, "read %04x from address %x/%x\n", value, mii_id, regnum); return value; } -static struct fsl_pq_mdio __iomem *fsl_pq_mdio_get_regs(struct mii_bus *bus) -{ - struct fsl_pq_mdio_priv *priv = bus->priv; - - return priv->regs; -} - -/* - * Write value to the PHY at mii_id at register regnum, - * on the bus, waiting until the write is done before returning. - */ -int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value) -{ - struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus); - - /* Write to the local MII regs */ - return fsl_pq_local_mdio_write(regs, mii_id, regnum, value); -} - -/* - * Read the bus for PHY at addr mii_id, register regnum, and - * return the value. Clears miimcom first. - */ -int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum) -{ - struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus); - - /* Read the local MII regs */ - return fsl_pq_local_mdio_read(regs, mii_id, regnum); -} - /* Reset the MIIM registers, and wait for the bus to free */ static int fsl_pq_mdio_reset(struct mii_bus *bus) { - struct fsl_pq_mdio __iomem *regs = fsl_pq_mdio_get_regs(bus); + struct fsl_pq_mdio_priv *priv = bus->priv; + struct fsl_pq_mii __iomem *regs = priv->regs; u32 status; mutex_lock(&bus->mdio_lock); @@ -170,234 +178,291 @@ static int fsl_pq_mdio_reset(struct mii_bus *bus) mutex_unlock(&bus->mdio_lock); if (!status) { - printk(KERN_ERR "%s: The MII Bus is stuck!\n", - bus->name); + dev_err(&bus->dev, "timeout waiting for MII bus\n"); return -EBUSY; } return 0; } -void fsl_pq_mdio_bus_name(char *name, struct device_node *np) +#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) +/* + * This is mildly evil, but so is our hardware for doing this. + * Also, we have to cast back to struct gfar because of + * definition weirdness done in gianfar.h. + */ +static uint32_t __iomem *get_gfar_tbipa(void __iomem *p) { - const u32 *addr; - u64 taddr = OF_BAD_ADDR; - - addr = of_get_address(np, 0, NULL, NULL); - if (addr) - taddr = of_translate_address(np, addr); + struct gfar __iomem *enet_regs = p; - snprintf(name, MII_BUS_ID_SIZE, "%s@%llx", np->name, - (unsigned long long)taddr); + return &enet_regs->tbipa; } -EXPORT_SYMBOL_GPL(fsl_pq_mdio_bus_name); +/* + * Return the TBIPAR address for an eTSEC2 node + */ +static uint32_t __iomem *get_etsec_tbipa(void __iomem *p) +{ + return p; +} +#endif -static u32 __iomem *get_gfar_tbipa(struct fsl_pq_mdio __iomem *regs, struct device_node *np) +#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) +/* + * Return the TBIPAR address for a QE MDIO node + */ +static uint32_t __iomem *get_ucc_tbipa(void __iomem *p) { -#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) - struct gfar __iomem *enet_regs; + struct fsl_pq_mdio __iomem *mdio = p; - /* - * This is mildly evil, but so is our hardware for doing this. - * Also, we have to cast back to struct gfar because of - * definition weirdness done in gianfar.h. - */ - if(of_device_is_compatible(np, "fsl,gianfar-mdio") || - of_device_is_compatible(np, "fsl,gianfar-tbi") || - of_device_is_compatible(np, "gianfar")) { - enet_regs = (struct gfar __iomem *)regs; - return &enet_regs->tbipa; - } else if (of_device_is_compatible(np, "fsl,etsec2-mdio") || - of_device_is_compatible(np, "fsl,etsec2-tbi")) { - return of_iomap(np, 1); - } -#endif - return NULL; + return &mdio->utbipar; } - -static int get_ucc_id_for_range(u64 start, u64 end, u32 *ucc_id) +/* + * Find the UCC node that controls the given MDIO node + * + * For some reason, the QE MDIO nodes are not children of the UCC devices + * that control them. Therefore, we need to scan all UCC nodes looking for + * the one that encompases the given MDIO node. We do this by comparing + * physical addresses. The 'start' and 'end' addresses of the MDIO node are + * passed, and the correct UCC node will cover the entire address range. + * + * This assumes that there is only one QE MDIO node in the entire device tree. + */ +static void ucc_configure(phys_addr_t start, phys_addr_t end) { -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) + static bool found_mii_master; struct device_node *np = NULL; - int err = 0; - for_each_compatible_node(np, NULL, "ucc_geth") { - struct resource tempres; + if (found_mii_master) + return; - err = of_address_to_resource(np, 0, &tempres); - if (err) + for_each_compatible_node(np, NULL, "ucc_geth") { + struct resource res; + const uint32_t *iprop; + uint32_t id; + int ret; + + ret = of_address_to_resource(np, 0, &res); + if (ret < 0) { + pr_debug("fsl-pq-mdio: no address range in node %s\n", + np->full_name); continue; + } /* if our mdio regs fall within this UCC regs range */ - if ((start >= tempres.start) && (end <= tempres.end)) { - /* Find the id of the UCC */ - const u32 *id; - - id = of_get_property(np, "cell-index", NULL); - if (!id) { - id = of_get_property(np, "device-id", NULL); - if (!id) - continue; + if ((start < res.start) || (end > res.end)) + continue; + + iprop = of_get_property(np, "cell-index", NULL); + if (!iprop) { + iprop = of_get_property(np, "device-id", NULL); + if (!iprop) { + pr_debug("fsl-pq-mdio: no UCC ID in node %s\n", + np->full_name); + continue; } + } - *ucc_id = *id; + id = be32_to_cpup(iprop); - return 0; + /* + * cell-index and device-id for QE nodes are + * numbered from 1, not 0. + */ + if (ucc_set_qe_mux_mii_mng(id - 1) < 0) { + pr_debug("fsl-pq-mdio: invalid UCC ID in node %s\n", + np->full_name); + continue; } + + pr_debug("fsl-pq-mdio: setting node UCC%u to MII master\n", id); + found_mii_master = true; } +} - if (err) - return err; - else - return -EINVAL; -#else - return -ENODEV; #endif -} -static int fsl_pq_mdio_probe(struct platform_device *ofdev) +static struct of_device_id fsl_pq_mdio_match[] = { +#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) + { + .compatible = "fsl,gianfar-tbi", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + .get_tbipa = get_gfar_tbipa, + }, + }, + { + .compatible = "fsl,gianfar-mdio", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + .get_tbipa = get_gfar_tbipa, + }, + }, + { + .type = "mdio", + .compatible = "gianfar", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = offsetof(struct fsl_pq_mdio, mii), + .get_tbipa = get_gfar_tbipa, + }, + }, + { + .compatible = "fsl,etsec2-tbi", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = offsetof(struct fsl_pq_mdio, mii), + .get_tbipa = get_etsec_tbipa, + }, + }, + { + .compatible = "fsl,etsec2-mdio", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = offsetof(struct fsl_pq_mdio, mii), + .get_tbipa = get_etsec_tbipa, + }, + }, +#endif +#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) + { + .compatible = "fsl,ucc-mdio", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + .get_tbipa = get_ucc_tbipa, + .ucc_configure = ucc_configure, + }, + }, + { + /* Legacy UCC MDIO node */ + .type = "mdio", + .compatible = "ucc_geth_phy", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + .get_tbipa = get_ucc_tbipa, + .ucc_configure = ucc_configure, + }, + }, +#endif + /* No Kconfig option for Fman support yet */ + { + .compatible = "fsl,fman-mdio", + .data = &(struct fsl_pq_mdio_data) { + .mii_offset = 0, + /* Fman TBI operations are handled elsewhere */ + }, + }, + + {}, +}; +MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match); + +static int fsl_pq_mdio_probe(struct platform_device *pdev) { - struct device_node *np = ofdev->dev.of_node; + const struct of_device_id *id = + of_match_device(fsl_pq_mdio_match, &pdev->dev); + const struct fsl_pq_mdio_data *data = id->data; + struct device_node *np = pdev->dev.of_node; + struct resource res; struct device_node *tbi; struct fsl_pq_mdio_priv *priv; - struct fsl_pq_mdio __iomem *regs = NULL; - void __iomem *map; - u32 __iomem *tbipa; struct mii_bus *new_bus; - int tbiaddr = -1; - const u32 *addrp; - u64 addr = 0, size = 0; int err; - priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + dev_dbg(&pdev->dev, "found %s compatible node\n", id->compatible); - new_bus = mdiobus_alloc(); - if (!new_bus) { - err = -ENOMEM; - goto err_free_priv; - } + new_bus = mdiobus_alloc_size(sizeof(*priv)); + if (!new_bus) + return -ENOMEM; + priv = new_bus->priv; new_bus->name = "Freescale PowerQUICC MII Bus", - new_bus->read = &fsl_pq_mdio_read, - new_bus->write = &fsl_pq_mdio_write, - new_bus->reset = &fsl_pq_mdio_reset, - new_bus->priv = priv; - fsl_pq_mdio_bus_name(new_bus->id, np); - - addrp = of_get_address(np, 0, &size, NULL); - if (!addrp) { - err = -EINVAL; - goto err_free_bus; + new_bus->read = &fsl_pq_mdio_read; + new_bus->write = &fsl_pq_mdio_write; + new_bus->reset = &fsl_pq_mdio_reset; + new_bus->irq = priv->irqs; + + err = of_address_to_resource(np, 0, &res); + if (err < 0) { + dev_err(&pdev->dev, "could not obtain address information\n"); + goto error; } - /* Set the PHY base address */ - addr = of_translate_address(np, addrp); - if (addr == OF_BAD_ADDR) { - err = -EINVAL; - goto err_free_bus; - } + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s@%llx", np->name, + (unsigned long long)res.start); - map = ioremap(addr, size); - if (!map) { + priv->map = of_iomap(np, 0); + if (!priv->map) { err = -ENOMEM; - goto err_free_bus; + goto error; } - priv->map = map; - - if (of_device_is_compatible(np, "fsl,gianfar-mdio") || - of_device_is_compatible(np, "fsl,gianfar-tbi") || - of_device_is_compatible(np, "fsl,ucc-mdio") || - of_device_is_compatible(np, "ucc_geth_phy")) - map -= offsetof(struct fsl_pq_mdio, miimcfg); - regs = map; - priv->regs = regs; - - new_bus->irq = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); - if (NULL == new_bus->irq) { - err = -ENOMEM; - goto err_unmap_regs; + /* + * Some device tree nodes represent only the MII registers, and + * others represent the MAC and MII registers. The 'mii_offset' field + * contains the offset of the MII registers inside the mapped register + * space. + */ + if (data->mii_offset > resource_size(&res)) { + dev_err(&pdev->dev, "invalid register map\n"); + err = -EINVAL; + goto error; } + priv->regs = priv->map + data->mii_offset; - new_bus->parent = &ofdev->dev; - dev_set_drvdata(&ofdev->dev, new_bus); - - if (of_device_is_compatible(np, "fsl,gianfar-mdio") || - of_device_is_compatible(np, "fsl,gianfar-tbi") || - of_device_is_compatible(np, "fsl,etsec2-mdio") || - of_device_is_compatible(np, "fsl,etsec2-tbi") || - of_device_is_compatible(np, "gianfar")) { - tbipa = get_gfar_tbipa(regs, np); - if (!tbipa) { - err = -EINVAL; - goto err_free_irqs; - } - } else if (of_device_is_compatible(np, "fsl,ucc-mdio") || - of_device_is_compatible(np, "ucc_geth_phy")) { - u32 id; - static u32 mii_mng_master; - - tbipa = ®s->utbipar; - - if ((err = get_ucc_id_for_range(addr, addr + size, &id))) - goto err_free_irqs; + new_bus->parent = &pdev->dev; + dev_set_drvdata(&pdev->dev, new_bus); - if (!mii_mng_master) { - mii_mng_master = id; - ucc_set_qe_mux_mii_mng(id - 1); + if (data->get_tbipa) { + for_each_child_of_node(np, tbi) { + if (strcmp(tbi->type, "tbi-phy") == 0) { + dev_dbg(&pdev->dev, "found TBI PHY node %s\n", + strrchr(tbi->full_name, '/') + 1); + break; + } } - } else { - err = -ENODEV; - goto err_free_irqs; - } - for_each_child_of_node(np, tbi) { - if (!strncmp(tbi->type, "tbi-phy", 8)) - break; - } + if (tbi) { + const u32 *prop = of_get_property(tbi, "reg", NULL); + uint32_t __iomem *tbipa; - if (tbi) { - const u32 *prop = of_get_property(tbi, "reg", NULL); + if (!prop) { + dev_err(&pdev->dev, + "missing 'reg' property in node %s\n", + tbi->full_name); + err = -EBUSY; + goto error; + } - if (prop) - tbiaddr = *prop; + tbipa = data->get_tbipa(priv->map); - if (tbiaddr == -1) { - err = -EBUSY; - goto err_free_irqs; - } else { - out_be32(tbipa, tbiaddr); + out_be32(tbipa, be32_to_cpup(prop)); } } + if (data->ucc_configure) + data->ucc_configure(res.start, res.end); + err = of_mdiobus_register(new_bus, np); if (err) { - printk (KERN_ERR "%s: Cannot register as MDIO bus\n", - new_bus->name); - goto err_free_irqs; + dev_err(&pdev->dev, "cannot register %s as MDIO bus\n", + new_bus->name); + goto error; } return 0; -err_free_irqs: - kfree(new_bus->irq); -err_unmap_regs: - iounmap(priv->map); -err_free_bus: +error: + if (priv->map) + iounmap(priv->map); + kfree(new_bus); -err_free_priv: - kfree(priv); + return err; } -static int fsl_pq_mdio_remove(struct platform_device *ofdev) +static int fsl_pq_mdio_remove(struct platform_device *pdev) { - struct device *device = &ofdev->dev; + struct device *device = &pdev->dev; struct mii_bus *bus = dev_get_drvdata(device); struct fsl_pq_mdio_priv *priv = bus->priv; @@ -406,41 +471,11 @@ static int fsl_pq_mdio_remove(struct platform_device *ofdev) dev_set_drvdata(device, NULL); iounmap(priv->map); - bus->priv = NULL; mdiobus_free(bus); - kfree(priv); return 0; } -static struct of_device_id fsl_pq_mdio_match[] = { - { - .type = "mdio", - .compatible = "ucc_geth_phy", - }, - { - .type = "mdio", - .compatible = "gianfar", - }, - { - .compatible = "fsl,ucc-mdio", - }, - { - .compatible = "fsl,gianfar-tbi", - }, - { - .compatible = "fsl,gianfar-mdio", - }, - { - .compatible = "fsl,etsec2-tbi", - }, - { - .compatible = "fsl,etsec2-mdio", - }, - {}, -}; -MODULE_DEVICE_TABLE(of, fsl_pq_mdio_match); - static struct platform_driver fsl_pq_mdio_driver = { .driver = { .name = "fsl-pq_mdio", diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.h b/drivers/net/ethernet/freescale/fsl_pq_mdio.h deleted file mode 100644 index bd17a2a0139..00000000000 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Freescale PowerQUICC MDIO Driver -- MII Management Bus Implementation - * Driver for the MDIO bus controller on Freescale PowerQUICC processors - * - * Author: Andy Fleming - * Modifier: Sandeep Gopalpet - * - * Copyright 2002-2004, 2008-2009 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ -#ifndef __FSL_PQ_MDIO_H -#define __FSL_PQ_MDIO_H - -#define MIIMIND_BUSY 0x00000001 -#define MIIMIND_NOTVALID 0x00000004 -#define MIIMCFG_INIT_VALUE 0x00000007 -#define MIIMCFG_RESET 0x80000000 - -#define MII_READ_COMMAND 0x00000001 - -struct fsl_pq_mdio { - u8 res1[16]; - u32 ieventm; /* MDIO Interrupt event register (for etsec2)*/ - u32 imaskm; /* MDIO Interrupt mask register (for etsec2)*/ - u8 res2[4]; - u32 emapm; /* MDIO Event mapping register (for etsec2)*/ - u8 res3[1280]; - u32 miimcfg; /* MII management configuration reg */ - u32 miimcom; /* MII management command reg */ - u32 miimadd; /* MII management address reg */ - u32 miimcon; /* MII management control reg */ - u32 miimstat; /* MII management status reg */ - u32 miimind; /* MII management indication reg */ - u8 reserved[28]; /* Space holder */ - u32 utbipar; /* TBI phy address reg (only on UCC) */ - u8 res4[2728]; -} __packed; - -int fsl_pq_mdio_read(struct mii_bus *bus, int mii_id, int regnum); -int fsl_pq_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value); -int fsl_pq_local_mdio_write(struct fsl_pq_mdio __iomem *regs, int mii_id, - int regnum, u16 value); -int fsl_pq_local_mdio_read(struct fsl_pq_mdio __iomem *regs, int mii_id, int regnum); -int __init fsl_pq_mdio_init(void); -void fsl_pq_mdio_exit(void); -void fsl_pq_mdio_bus_name(char *name, struct device_node *np); -#endif /* FSL_PQ_MDIO_H */ diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index d3233f59a82..a1b52ec3b93 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -100,7 +100,6 @@ #include <linux/of_net.h> #include "gianfar.h" -#include "fsl_pq_mdio.h" #define TX_TIMEOUT (1*HZ) @@ -395,7 +394,13 @@ static void gfar_init_mac(struct net_device *ndev) if (ndev->features & NETIF_F_IP_CSUM) tctrl |= TCTRL_INIT_CSUM; - tctrl |= TCTRL_TXSCHED_PRIO; + if (priv->prio_sched_en) + tctrl |= TCTRL_TXSCHED_PRIO; + else { + tctrl |= TCTRL_TXSCHED_WRRS; + gfar_write(®s->tr03wt, DEFAULT_WRRS_WEIGHT); + gfar_write(®s->tr47wt, DEFAULT_WRRS_WEIGHT); + } gfar_write(®s->tctrl, tctrl); @@ -1161,6 +1166,9 @@ static int gfar_probe(struct platform_device *ofdev) priv->rx_filer_enable = 1; /* Enable most messages by default */ priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1; + /* use pritority h/w tx queue scheduling for single queue devices */ + if (priv->num_tx_queues == 1) + priv->prio_sched_en = 1; /* Carrier starts down, phylib will bring it up */ netif_carrier_off(dev); diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index 2136c7ff5e6..4141ef2ddaf 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -301,8 +301,16 @@ extern const char gfar_driver_version[]; #define TCTRL_TFCPAUSE 0x00000008 #define TCTRL_TXSCHED_MASK 0x00000006 #define TCTRL_TXSCHED_INIT 0x00000000 +/* priority scheduling */ #define TCTRL_TXSCHED_PRIO 0x00000002 +/* weighted round-robin scheduling (WRRS) */ #define TCTRL_TXSCHED_WRRS 0x00000004 +/* default WRRS weight and policy setting, + * tailored to the tr03wt and tr47wt registers: + * equal weight for all Tx Qs, measured in 64byte units + */ +#define DEFAULT_WRRS_WEIGHT 0x18181818 + #define TCTRL_INIT_CSUM (TCTRL_TUCSEN | TCTRL_IPCSEN) #define IEVENT_INIT_CLEAR 0xffffffff @@ -1098,7 +1106,8 @@ struct gfar_private { extended_hash:1, bd_stash_en:1, rx_filer_enable:1, - wol_en:1; /* Wake-on-LAN enabled */ + wol_en:1, /* Wake-on-LAN enabled */ + prio_sched_en:1; /* Enable priorty based Tx scheduling in Hw */ unsigned short padding; /* PHY stuff */ diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c index 0daa66b8eca..b9db0e04056 100644 --- a/drivers/net/ethernet/freescale/gianfar_ptp.c +++ b/drivers/net/ethernet/freescale/gianfar_ptp.c @@ -510,7 +510,7 @@ static int gianfar_ptp_probe(struct platform_device *dev) spin_unlock_irqrestore(&etsects->lock, flags); - etsects->clock = ptp_clock_register(&etsects->caps); + etsects->clock = ptp_clock_register(&etsects->caps, &dev->dev); if (IS_ERR(etsects->clock)) { err = PTR_ERR(etsects->clock); goto no_clock; diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 21c6574c5f1..16428843922 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -42,7 +42,6 @@ #include <asm/machdep.h> #include "ucc_geth.h" -#include "fsl_pq_mdio.h" #undef DEBUG diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c new file mode 100644 index 00000000000..1afb5ea2a98 --- /dev/null +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -0,0 +1,274 @@ +/* + * QorIQ 10G MDIO Controller + * + * Copyright 2012 Freescale Semiconductor, Inc. + * + * Authors: Andy Fleming <afleming@freescale.com> + * Timur Tabi <timur@freescale.com> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/phy.h> +#include <linux/mdio.h> +#include <linux/of_platform.h> +#include <linux/of_mdio.h> + +/* Number of microseconds to wait for a register to respond */ +#define TIMEOUT 1000 + +struct tgec_mdio_controller { + __be32 reserved[12]; + __be32 mdio_stat; /* MDIO configuration and status */ + __be32 mdio_ctl; /* MDIO control */ + __be32 mdio_data; /* MDIO data */ + __be32 mdio_addr; /* MDIO address */ +} __packed; + +#define MDIO_STAT_CLKDIV(x) (((x>>1) & 0xff) << 8) +#define MDIO_STAT_BSY (1 << 0) +#define MDIO_STAT_RD_ER (1 << 1) +#define MDIO_CTL_DEV_ADDR(x) (x & 0x1f) +#define MDIO_CTL_PORT_ADDR(x) ((x & 0x1f) << 5) +#define MDIO_CTL_PRE_DIS (1 << 10) +#define MDIO_CTL_SCAN_EN (1 << 11) +#define MDIO_CTL_POST_INC (1 << 14) +#define MDIO_CTL_READ (1 << 15) + +#define MDIO_DATA(x) (x & 0xffff) +#define MDIO_DATA_BSY (1 << 31) + +/* + * Wait untill the MDIO bus is free + */ +static int xgmac_wait_until_free(struct device *dev, + struct tgec_mdio_controller __iomem *regs) +{ + uint32_t status; + + /* Wait till the bus is free */ + status = spin_event_timeout( + !((in_be32(®s->mdio_stat)) & MDIO_STAT_BSY), TIMEOUT, 0); + if (!status) { + dev_err(dev, "timeout waiting for bus to be free\n"); + return -ETIMEDOUT; + } + + return 0; +} + +/* + * Wait till the MDIO read or write operation is complete + */ +static int xgmac_wait_until_done(struct device *dev, + struct tgec_mdio_controller __iomem *regs) +{ + uint32_t status; + + /* Wait till the MDIO write is complete */ + status = spin_event_timeout( + !((in_be32(®s->mdio_data)) & MDIO_DATA_BSY), TIMEOUT, 0); + if (!status) { + dev_err(dev, "timeout waiting for operation to complete\n"); + return -ETIMEDOUT; + } + + return 0; +} + +/* + * Write value to the PHY for this device to the register at regnum,waiting + * until the write is done before it returns. All PHY configuration has to be + * done through the TSEC1 MIIM regs. + */ +static int xgmac_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value) +{ + struct tgec_mdio_controller __iomem *regs = bus->priv; + uint16_t dev_addr = regnum >> 16; + int ret; + + /* Setup the MII Mgmt clock speed */ + out_be32(®s->mdio_stat, MDIO_STAT_CLKDIV(100)); + + ret = xgmac_wait_until_free(&bus->dev, regs); + if (ret) + return ret; + + /* Set the port and dev addr */ + out_be32(®s->mdio_ctl, + MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr)); + + /* Set the register address */ + out_be32(®s->mdio_addr, regnum & 0xffff); + + ret = xgmac_wait_until_free(&bus->dev, regs); + if (ret) + return ret; + + /* Write the value to the register */ + out_be32(®s->mdio_data, MDIO_DATA(value)); + + ret = xgmac_wait_until_done(&bus->dev, regs); + if (ret) + return ret; + + return 0; +} + +/* + * Reads from register regnum in the PHY for device dev, returning the value. + * Clears miimcom first. All PHY configuration has to be done through the + * TSEC1 MIIM regs. + */ +static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) +{ + struct tgec_mdio_controller __iomem *regs = bus->priv; + uint16_t dev_addr = regnum >> 16; + uint32_t mdio_ctl; + uint16_t value; + int ret; + + /* Setup the MII Mgmt clock speed */ + out_be32(®s->mdio_stat, MDIO_STAT_CLKDIV(100)); + + ret = xgmac_wait_until_free(&bus->dev, regs); + if (ret) + return ret; + + /* Set the Port and Device Addrs */ + mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr); + out_be32(®s->mdio_ctl, mdio_ctl); + + /* Set the register address */ + out_be32(®s->mdio_addr, regnum & 0xffff); + + ret = xgmac_wait_until_free(&bus->dev, regs); + if (ret) + return ret; + + /* Initiate the read */ + out_be32(®s->mdio_ctl, mdio_ctl | MDIO_CTL_READ); + + ret = xgmac_wait_until_done(&bus->dev, regs); + if (ret) + return ret; + + /* Return all Fs if nothing was there */ + if (in_be32(®s->mdio_stat) & MDIO_STAT_RD_ER) { + dev_err(&bus->dev, "MDIO read error\n"); + return 0xffff; + } + + value = in_be32(®s->mdio_data) & 0xffff; + dev_dbg(&bus->dev, "read %04x\n", value); + + return value; +} + +/* Reset the MIIM registers, and wait for the bus to free */ +static int xgmac_mdio_reset(struct mii_bus *bus) +{ + struct tgec_mdio_controller __iomem *regs = bus->priv; + int ret; + + mutex_lock(&bus->mdio_lock); + + /* Setup the MII Mgmt clock speed */ + out_be32(®s->mdio_stat, MDIO_STAT_CLKDIV(100)); + + ret = xgmac_wait_until_free(&bus->dev, regs); + + mutex_unlock(&bus->mdio_lock); + + return ret; +} + +static int __devinit xgmac_mdio_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct mii_bus *bus; + struct resource res; + int ret; + + ret = of_address_to_resource(np, 0, &res); + if (ret) { + dev_err(&pdev->dev, "could not obtain address\n"); + return ret; + } + + bus = mdiobus_alloc_size(PHY_MAX_ADDR * sizeof(int)); + if (!bus) + return -ENOMEM; + + bus->name = "Freescale XGMAC MDIO Bus"; + bus->read = xgmac_mdio_read; + bus->write = xgmac_mdio_write; + bus->reset = xgmac_mdio_reset; + bus->irq = bus->priv; + bus->parent = &pdev->dev; + snprintf(bus->id, MII_BUS_ID_SIZE, "%llx", (unsigned long long)res.start); + + /* Set the PHY base address */ + bus->priv = of_iomap(np, 0); + if (!bus->priv) { + ret = -ENOMEM; + goto err_ioremap; + } + + ret = of_mdiobus_register(bus, np); + if (ret) { + dev_err(&pdev->dev, "cannot register MDIO bus\n"); + goto err_registration; + } + + dev_set_drvdata(&pdev->dev, bus); + + return 0; + +err_registration: + iounmap(bus->priv); + +err_ioremap: + mdiobus_free(bus); + + return ret; +} + +static int __devexit xgmac_mdio_remove(struct platform_device *pdev) +{ + struct mii_bus *bus = dev_get_drvdata(&pdev->dev); + + mdiobus_unregister(bus); + iounmap(bus->priv); + mdiobus_free(bus); + + return 0; +} + +static struct of_device_id xgmac_mdio_match[] = { + { + .compatible = "fsl,fman-xmdio", + }, + {}, +}; +MODULE_DEVICE_TABLE(of, xgmac_mdio_match); + +static struct platform_driver xgmac_mdio_driver = { + .driver = { + .name = "fsl-fman_xmdio", + .of_match_table = xgmac_mdio_match, + }, + .probe = xgmac_mdio_probe, + .remove = xgmac_mdio_remove, +}; + +module_platform_driver(xgmac_mdio_driver); + +MODULE_DESCRIPTION("Freescale QorIQ 10G MDIO Controller"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/i825xx/Kconfig b/drivers/net/ethernet/i825xx/Kconfig index fed5080a6b6..959faf7388e 100644 --- a/drivers/net/ethernet/i825xx/Kconfig +++ b/drivers/net/ethernet/i825xx/Kconfig @@ -150,7 +150,7 @@ config SUN3_82586 config ZNET tristate "Zenith Z-Note support (EXPERIMENTAL)" - depends on EXPERIMENTAL && ISA_DMA_API + depends on EXPERIMENTAL && ISA_DMA_API && X86 ---help--- The Zenith Z-Note notebook computer has a built-in network (Ethernet) card, and this is the Linux driver for it. Note that the diff --git a/drivers/net/ethernet/i825xx/znet.c b/drivers/net/ethernet/i825xx/znet.c index ba4e0cea350..c9479e081b8 100644 --- a/drivers/net/ethernet/i825xx/znet.c +++ b/drivers/net/ethernet/i825xx/znet.c @@ -865,14 +865,14 @@ static void hardware_init(struct net_device *dev) disable_dma(znet->rx_dma); /* reset by an interrupting task. */ clear_dma_ff(znet->rx_dma); set_dma_mode(znet->rx_dma, DMA_RX_MODE); - set_dma_addr(znet->rx_dma, (unsigned int) znet->rx_start); + set_dma_addr(znet->rx_dma, isa_virt_to_bus(znet->rx_start)); set_dma_count(znet->rx_dma, RX_BUF_SIZE); enable_dma(znet->rx_dma); /* Now set up the Tx channel. */ disable_dma(znet->tx_dma); clear_dma_ff(znet->tx_dma); set_dma_mode(znet->tx_dma, DMA_TX_MODE); - set_dma_addr(znet->tx_dma, (unsigned int) znet->tx_start); + set_dma_addr(znet->tx_dma, isa_virt_to_bus(znet->tx_start)); set_dma_count(znet->tx_dma, znet->tx_buf_len<<1); enable_dma(znet->tx_dma); release_dma_lock(flags); diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 535f94fac4a..29ce9bd27f9 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -3157,7 +3157,7 @@ static void e100_io_resume(struct pci_dev *pdev) } } -static struct pci_error_handlers e100_err_handler = { +static const struct pci_error_handlers e100_err_handler = { .error_detected = e100_io_error_detected, .slot_reset = e100_io_slot_reset, .resume = e100_io_resume, diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 736a7d987db..9089d00f142 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -174,6 +174,20 @@ static int e1000_get_settings(struct net_device *netdev, ecmd->autoneg = ((hw->media_type == e1000_media_type_fiber) || hw->autoneg) ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 1; MDI => 0 */ + if ((hw->media_type == e1000_media_type_copper) && + netif_carrier_ok(netdev)) + ecmd->eth_tp_mdix = (!!adapter->phy_info.mdix_mode ? + ETH_TP_MDI_X : + ETH_TP_MDI); + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->mdix; return 0; } @@ -183,6 +197,22 @@ static int e1000_set_settings(struct net_device *netdev, struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + e_err(drv, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) msleep(1); @@ -199,12 +229,21 @@ static int e1000_set_settings(struct net_device *netdev, ecmd->advertising = hw->autoneg_advertised; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__E1000_RESETTING, &adapter->flags); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->mdix = AUTO_ALL_MODES; + else + hw->mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index bde337ee1a3..222bfaff462 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -192,7 +192,7 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev); static void e1000_io_resume(struct pci_dev *pdev); -static struct pci_error_handlers e1000_err_handler = { +static const struct pci_error_handlers e1000_err_handler = { .error_detected = e1000_io_error_detected, .slot_reset = e1000_io_slot_reset, .resume = e1000_io_resume, @@ -2014,6 +2014,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter, e1000_unmap_and_free_tx_resource(adapter, buffer_info); } + netdev_reset_queue(adapter->netdev); size = sizeof(struct e1000_buffer) * tx_ring->count; memset(tx_ring->buffer_info, 0, size); @@ -3273,6 +3274,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, nr_frags, mss); if (count) { + netdev_sent_queue(netdev, skb->len); skb_tx_timestamp(skb); e1000_tx_queue(adapter, tx_ring, tx_flags, count); @@ -3860,6 +3862,7 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, unsigned int i, eop; unsigned int count = 0; unsigned int total_tx_bytes=0, total_tx_packets=0; + unsigned int bytes_compl = 0, pkts_compl = 0; i = tx_ring->next_to_clean; eop = tx_ring->buffer_info[i].next_to_watch; @@ -3877,6 +3880,11 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, if (cleaned) { total_tx_packets += buffer_info->segs; total_tx_bytes += buffer_info->bytecount; + if (buffer_info->skb) { + bytes_compl += buffer_info->skb->len; + pkts_compl++; + } + } e1000_unmap_and_free_tx_resource(adapter, buffer_info); tx_desc->upper.data = 0; @@ -3890,6 +3898,8 @@ static bool e1000_clean_tx_irq(struct e1000_adapter *adapter, tx_ring->next_to_clean = i; + netdev_completed_queue(netdev, pkts_compl, bytes_compl); + #define TX_WAKE_THRESHOLD 32 if (unlikely(count && netif_carrier_ok(netdev) && E1000_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD)) { @@ -4950,6 +4960,10 @@ int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + hw->mdix = AUTO_ALL_MODES; + return 0; err_inval: diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 080c89093fe..c9858640800 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -653,7 +653,7 @@ static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw) **/ static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active) { - u16 data = er32(POEMB); + u32 data = er32(POEMB); if (active) data |= E1000_PHY_CTRL_D0A_LPLU; @@ -677,7 +677,7 @@ static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, bool active) **/ static s32 e1000_set_d3_lplu_state_82574(struct e1000_hw *hw, bool active) { - u16 data = er32(POEMB); + u32 data = er32(POEMB); if (!active) { data &= ~E1000_PHY_CTRL_NOND0A_LPLU; diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 0349e2478df..c11ac275666 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -199,6 +199,11 @@ static int e1000_get_settings(struct net_device *netdev, else ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + return 0; } @@ -241,6 +246,10 @@ static int e1000_set_spd_dplx(struct e1000_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: @@ -264,6 +273,22 @@ static int e1000_set_settings(struct net_device *netdev, return -EINVAL; } + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + e_err("forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) usleep_range(1000, 2000); @@ -282,20 +307,32 @@ static int e1000_set_settings(struct net_device *netdev, hw->fc.requested_mode = e1000_fc_default; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (e1000_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__E1000_RESETTING, &adapter->state); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* + * fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { e1000e_down(adapter); e1000e_up(adapter); - } else { + } else e1000e_reset(adapter); - } clear_bit(__E1000_RESETTING, &adapter->state); return 0; @@ -1905,7 +1942,8 @@ static int e1000_set_coalesce(struct net_device *netdev, return -EINVAL; if (ec->rx_coalesce_usecs == 4) { - adapter->itr = adapter->itr_setting = 4; + adapter->itr_setting = 4; + adapter->itr = adapter->itr_setting; } else if (ec->rx_coalesce_usecs <= 3) { adapter->itr = 20000; adapter->itr_setting = ec->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index d01a099475a..fb659dd8db0 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -56,7 +56,7 @@ #define DRV_EXTRAVERSION "-k" -#define DRV_VERSION "2.0.0" DRV_EXTRAVERSION +#define DRV_VERSION "2.1.4" DRV_EXTRAVERSION char e1000e_driver_name[] = "e1000e"; const char e1000e_driver_version[] = DRV_VERSION; @@ -3446,7 +3446,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /* * if short on Rx space, Rx wins and must trump Tx - * adjustment or use Early Receive if available + * adjustment */ if (pba < min_rx_space) pba = min_rx_space; @@ -3755,6 +3755,10 @@ static irqreturn_t e1000_intr_msi_test(int irq, void *data) e_dbg("icr is %08X\n", icr); if (icr & E1000_ICR_RXSEQ) { adapter->flags &= ~FLAG_MSI_TEST_FAILED; + /* + * Force memory writes to complete before acknowledging the + * interrupt is handled. + */ wmb(); } @@ -3796,6 +3800,10 @@ static int e1000_test_msi_interrupt(struct e1000_adapter *adapter) goto msi_test_failed; } + /* + * Force memory writes to complete before enabling and firing an + * interrupt. + */ wmb(); e1000_irq_enable(adapter); @@ -3807,7 +3815,7 @@ static int e1000_test_msi_interrupt(struct e1000_adapter *adapter) e1000_irq_disable(adapter); - rmb(); + rmb(); /* read flags after interrupt has been fired */ if (adapter->flags & FLAG_MSI_TEST_FAILED) { adapter->int_mode = E1000E_INT_MODE_LEGACY; @@ -4670,7 +4678,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) struct e1000_buffer *buffer_info; unsigned int i; u32 cmd_length = 0; - u16 ipcse = 0, tucse, mss; + u16 ipcse = 0, mss; u8 ipcss, ipcso, tucss, tucso, hdr_len; if (!skb_is_gso(skb)) @@ -4704,7 +4712,6 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data; tucss = skb_transport_offset(skb); tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data; - tucse = 0; cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE | E1000_TXD_CMD_TCP | (skb->len - (hdr_len))); @@ -4718,7 +4725,7 @@ static int e1000_tso(struct e1000_ring *tx_ring, struct sk_buff *skb) context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse); context_desc->upper_setup.tcp_fields.tucss = tucss; context_desc->upper_setup.tcp_fields.tucso = tucso; - context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse); + context_desc->upper_setup.tcp_fields.tucse = 0; context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss); context_desc->tcp_seg_setup.fields.hdr_len = hdr_len; context_desc->cmd_and_length = cpu_to_le32(cmd_length); @@ -5584,16 +5591,15 @@ static void e1000_complete_shutdown(struct pci_dev *pdev, bool sleep, */ if (adapter->flags & FLAG_IS_QUAD_PORT) { struct pci_dev *us_dev = pdev->bus->self; - int pos = pci_pcie_cap(us_dev); u16 devctl; - pci_read_config_word(us_dev, pos + PCI_EXP_DEVCTL, &devctl); - pci_write_config_word(us_dev, pos + PCI_EXP_DEVCTL, - (devctl & ~PCI_EXP_DEVCTL_CERE)); + pcie_capability_read_word(us_dev, PCI_EXP_DEVCTL, &devctl); + pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL, + (devctl & ~PCI_EXP_DEVCTL_CERE)); e1000_power_off(pdev, sleep, wake); - pci_write_config_word(us_dev, pos + PCI_EXP_DEVCTL, devctl); + pcie_capability_write_word(us_dev, PCI_EXP_DEVCTL, devctl); } else { e1000_power_off(pdev, sleep, wake); } @@ -5607,25 +5613,15 @@ static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state) #else static void __e1000e_disable_aspm(struct pci_dev *pdev, u16 state) { - int pos; - u16 reg16; - /* * Both device and parent should have the same ASPM setting. * Disable ASPM in downstream component first and then upstream. */ - pos = pci_pcie_cap(pdev); - pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, ®16); - reg16 &= ~state; - pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, reg16); - - if (!pdev->bus->self) - return; + pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, state); - pos = pci_pcie_cap(pdev->bus->self); - pci_read_config_word(pdev->bus->self, pos + PCI_EXP_LNKCTL, ®16); - reg16 &= ~state; - pci_write_config_word(pdev->bus->self, pos + PCI_EXP_LNKCTL, reg16); + if (pdev->bus->self) + pcie_capability_clear_word(pdev->bus->self, PCI_EXP_LNKCTL, + state); } #endif static void e1000e_disable_aspm(struct pci_dev *pdev, u16 state) @@ -6486,7 +6482,7 @@ static void __devexit e1000_remove(struct pci_dev *pdev) } /* PCI Error Recovery (ERS) */ -static struct pci_error_handlers e1000_err_handler = { +static const struct pci_error_handlers e1000_err_handler = { .error_detected = e1000_io_error_detected, .slot_reset = e1000_io_slot_reset, .resume = e1000_io_resume, diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c index b860d4f7ea2..fc62a3f3a5b 100644 --- a/drivers/net/ethernet/intel/e1000e/phy.c +++ b/drivers/net/ethernet/intel/e1000e/phy.c @@ -84,8 +84,9 @@ static const u16 e1000_igp_2_cable_length_table[] = { #define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200 /* I82577 PHY Control 2 */ -#define I82577_PHY_CTRL2_AUTO_MDIX 0x0400 -#define I82577_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 +#define I82577_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82577_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82577_PHY_CTRL2_MDIX_CFG_MASK 0x0600 /* I82577 PHY Diagnostics Status */ #define I82577_DSTATUS_CABLE_LENGTH 0x03FC @@ -702,6 +703,32 @@ s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) if (ret_val) return ret_val; + /* Set MDI/MDIX mode */ + ret_val = e1e_rphy(hw, I82577_PHY_CTRL_2, &phy_data); + if (ret_val) + return ret_val; + phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK; + /* + * Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = e1e_wphy(hw, I82577_PHY_CTRL_2, phy_data); + if (ret_val) + return ret_val; + return e1000_set_master_slave_mode(hw); } diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index ba994fb4cec..ca4641e2f74 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -2223,11 +2223,10 @@ out: s32 igb_set_eee_i350(struct e1000_hw *hw) { s32 ret_val = 0; - u32 ipcnfg, eeer, ctrl_ext; + u32 ipcnfg, eeer; - ctrl_ext = rd32(E1000_CTRL_EXT); - if ((hw->mac.type != e1000_i350) || - (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK)) + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) goto out; ipcnfg = rd32(E1000_IPCNFG); eeer = rd32(E1000_EEER); @@ -2240,6 +2239,14 @@ s32 igb_set_eee_i350(struct e1000_hw *hw) E1000_EEER_RX_LPI_EN | E1000_EEER_LPI_FC); + /* keep the LPI clock running before EEE is enabled */ + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) { + u32 eee_su; + eee_su = rd32(E1000_EEE_SU); + eee_su &= ~E1000_EEE_SU_LPI_CLK_STP; + wr32(E1000_EEE_SU, eee_su); + } + } else { ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); @@ -2249,6 +2256,8 @@ s32 igb_set_eee_i350(struct e1000_hw *hw) } wr32(E1000_IPCNFG, ipcnfg); wr32(E1000_EEER, eeer); + rd32(E1000_IPCNFG); + rd32(E1000_EEER); out: return ret_val; diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index ec7e4fe3e3e..de4b41ec3c4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -322,6 +322,9 @@ #define E1000_FCRTC_RTH_COAL_SHIFT 4 #define E1000_PCIEMISC_LX_DECISION 0x00000080 /* Lx power decision */ +/* Timestamp in Rx buffer */ +#define E1000_RXPBS_CFG_TS_EN 0x80000000 + /* SerDes Control */ #define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 @@ -360,6 +363,7 @@ #define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */ #define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */ #define E1000_ICR_VMMB 0x00000100 /* VM MB event */ +#define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */ #define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */ /* If this bit asserted, the driver should claim the interrupt */ #define E1000_ICR_INT_ASSERTED 0x80000000 @@ -399,6 +403,7 @@ #define E1000_IMS_TXDW E1000_ICR_TXDW /* Transmit desc written back */ #define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ #define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */ +#define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */ #define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* rx sequence error */ #define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */ #define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */ @@ -510,6 +515,9 @@ #define E1000_TIMINCA_16NS_SHIFT 24 +#define E1000_TSICR_TXTS 0x00000002 +#define E1000_TSIM_TXTS 0x00000002 + #define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */ #define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */ #define E1000_MDICNFG_PHY_MASK 0x03E00000 @@ -849,8 +857,9 @@ #define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* EEE Enable 100M AN */ #define E1000_EEER_TX_LPI_EN 0x00010000 /* EEE Tx LPI Enable */ #define E1000_EEER_RX_LPI_EN 0x00020000 /* EEE Rx LPI Enable */ -#define E1000_EEER_FRC_AN 0x10000000 /* Enable EEE in loopback */ +#define E1000_EEER_FRC_AN 0x10000000 /* Enable EEE in loopback */ #define E1000_EEER_LPI_FC 0x00040000 /* EEE Enable on FC */ +#define E1000_EEE_SU_LPI_CLK_STP 0X00800000 /* EEE LPI Clock Stop */ /* SerDes Control */ #define E1000_GEN_CTL_READY 0x80000000 diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.c b/drivers/net/ethernet/intel/igb/e1000_phy.c index 7be98b6f105..3404bc79f4c 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.c +++ b/drivers/net/ethernet/intel/igb/e1000_phy.c @@ -464,6 +464,32 @@ s32 igb_copper_link_setup_82580(struct e1000_hw *hw) phy_data |= I82580_CFG_ENABLE_DOWNSHIFT; ret_val = phy->ops.write_reg(hw, I82580_CFG_REG, phy_data); + if (ret_val) + goto out; + + /* Set MDI/MDIX mode */ + ret_val = phy->ops.read_reg(hw, I82580_PHY_CTRL_2, &phy_data); + if (ret_val) + goto out; + phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; + /* + * Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82580_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82580_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = hw->phy.ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); out: return ret_val; @@ -2246,8 +2272,7 @@ s32 igb_phy_force_speed_duplex_82580(struct e1000_hw *hw) if (ret_val) goto out; - phy_data &= ~I82580_PHY_CTRL2_AUTO_MDIX; - phy_data &= ~I82580_PHY_CTRL2_FORCE_MDI_MDIX; + phy_data &= ~I82580_PHY_CTRL2_MDIX_CFG_MASK; ret_val = phy->ops.write_reg(hw, I82580_PHY_CTRL_2, phy_data); if (ret_val) diff --git a/drivers/net/ethernet/intel/igb/e1000_phy.h b/drivers/net/ethernet/intel/igb/e1000_phy.h index 34e40619f16..6ac3299bfcb 100644 --- a/drivers/net/ethernet/intel/igb/e1000_phy.h +++ b/drivers/net/ethernet/intel/igb/e1000_phy.h @@ -111,8 +111,9 @@ s32 igb_check_polarity_m88(struct e1000_hw *hw); #define I82580_PHY_STATUS2_SPEED_100MBPS 0x0100 /* I82580 PHY Control 2 */ -#define I82580_PHY_CTRL2_AUTO_MDIX 0x0400 -#define I82580_PHY_CTRL2_FORCE_MDI_MDIX 0x0200 +#define I82580_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82580_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82580_PHY_CTRL2_MDIX_CFG_MASK 0x0600 /* I82580 PHY Diagnostics Status */ #define I82580_DSTATUS_CABLE_LENGTH 0x03FC diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 28394bea525..e5db48594e8 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -91,6 +91,8 @@ #define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ #define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ #define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ +#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ +#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ /* Filtering Registers */ #define E1000_SAQF(_n) (0x5980 + 4 * (_n)) @@ -347,6 +349,7 @@ /* Energy Efficient Ethernet "EEE" register */ #define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ #define E1000_EEER 0x0E30 /* Energy Efficient Ethernet */ +#define E1000_EEE_SU 0X0E34 /* EEE Setup */ /* Thermal Sensor Register */ #define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 9e572dd29ab..8aad230c059 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -34,9 +34,11 @@ #include "e1000_mac.h" #include "e1000_82575.h" +#ifdef CONFIG_IGB_PTP #include <linux/clocksource.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> +#endif /* CONFIG_IGB_PTP */ #include <linux/bitops.h> #include <linux/if_vlan.h> @@ -99,7 +101,6 @@ struct vf_data_storage { u16 pf_vlan; /* When set, guest VLAN config not allowed. */ u16 pf_qos; u16 tx_rate; - struct pci_dev *vfdev; }; #define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ @@ -131,9 +132,9 @@ struct vf_data_storage { #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 /* Supported Rx Buffer Sizes */ -#define IGB_RXBUFFER_512 512 +#define IGB_RXBUFFER_256 256 #define IGB_RXBUFFER_16384 16384 -#define IGB_RX_HDR_LEN IGB_RXBUFFER_512 +#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 /* How many Tx Descriptors do we need to call netif_wake_queue ? */ #define IGB_TX_QUEUE_WAKE 16 @@ -167,8 +168,8 @@ struct igb_tx_buffer { unsigned int bytecount; u16 gso_segs; __be16 protocol; - dma_addr_t dma; - u32 length; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); u32 tx_flags; }; @@ -212,7 +213,6 @@ struct igb_q_vector { struct igb_ring_container rx, tx; struct napi_struct napi; - int numa_node; u16 itr_val; u8 set_itr; @@ -257,7 +257,6 @@ struct igb_ring { }; /* Items past this point are only used during ring alloc / free */ dma_addr_t dma; /* phys address of the ring */ - int numa_node; /* node to alloc ring memory on */ }; enum e1000_ring_flags_t { @@ -342,7 +341,6 @@ struct igb_adapter { /* OS defined structs */ struct pci_dev *pdev; - struct hwtstamp_config hwtstamp_config; spinlock_t stats64_lock; struct rtnl_link_stats64 stats64; @@ -373,15 +371,19 @@ struct igb_adapter { int vf_rate_link_speed; u32 rss_queues; u32 wvbr; - int node; u32 *shadow_vfta; +#ifdef CONFIG_IGB_PTP struct ptp_clock *ptp_clock; - struct ptp_clock_info caps; - struct delayed_work overflow_work; + struct ptp_clock_info ptp_caps; + struct delayed_work ptp_overflow_work; + struct work_struct ptp_tx_work; + struct sk_buff *ptp_tx_skb; spinlock_t tmreg_lock; struct cyclecounter cc; struct timecounter tc; +#endif /* CONFIG_IGB_PTP */ + char fw_version[32]; }; @@ -390,6 +392,7 @@ struct igb_adapter { #define IGB_FLAG_QUAD_PORT_A (1 << 2) #define IGB_FLAG_QUEUE_PAIRS (1 << 3) #define IGB_FLAG_DMAC (1 << 4) +#define IGB_FLAG_PTP (1 << 5) /* DMA Coalescing defines */ #define IGB_MIN_TXPBSIZE 20408 @@ -435,13 +438,17 @@ extern void igb_power_up_link(struct igb_adapter *); extern void igb_set_fw_version(struct igb_adapter *); #ifdef CONFIG_IGB_PTP extern void igb_ptp_init(struct igb_adapter *adapter); -extern void igb_ptp_remove(struct igb_adapter *adapter); - -extern void igb_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim); +extern void igb_ptp_stop(struct igb_adapter *adapter); +extern void igb_ptp_reset(struct igb_adapter *adapter); +extern void igb_ptp_tx_work(struct work_struct *work); +extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); +extern void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb); +extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd); +#endif /* CONFIG_IGB_PTP */ -#endif static inline s32 igb_reset_phy(struct e1000_hw *hw) { if (hw->phy.ops.reset) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 70591117051..2ea01284982 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -148,9 +148,9 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) SUPPORTED_100baseT_Full | SUPPORTED_1000baseT_Full| SUPPORTED_Autoneg | - SUPPORTED_TP); - ecmd->advertising = (ADVERTISED_TP | - ADVERTISED_Pause); + SUPPORTED_TP | + SUPPORTED_Pause); + ecmd->advertising = ADVERTISED_TP; if (hw->mac.autoneg == 1) { ecmd->advertising |= ADVERTISED_Autoneg; @@ -158,6 +158,21 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) ecmd->advertising |= hw->phy.autoneg_advertised; } + if (hw->mac.autoneg != 1) + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + + if (hw->fc.requested_mode == e1000_fc_full) + ecmd->advertising |= ADVERTISED_Pause; + else if (hw->fc.requested_mode == e1000_fc_rx_pause) + ecmd->advertising |= (ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + else if (hw->fc.requested_mode == e1000_fc_tx_pause) + ecmd->advertising |= ADVERTISED_Asym_Pause; + else + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + ecmd->port = PORT_TP; ecmd->phy_address = hw->phy.addr; } else { @@ -198,6 +213,19 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) } ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + + /* MDI-X => 2; MDI =>1; Invalid =>0 */ + if (hw->phy.media_type == e1000_media_type_copper) + ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : + ETH_TP_MDI; + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + return 0; } @@ -214,6 +242,22 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) return -EINVAL; } + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) msleep(1); @@ -227,12 +271,25 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) hw->fc.requested_mode = e1000_fc_default; } else { u32 speed = ethtool_cmd_speed(ecmd); + /* calling this overrides forced MDI setting */ if (igb_set_spd_dplx(adapter, speed, ecmd->duplex)) { clear_bit(__IGB_RESETTING, &adapter->state); return -EINVAL; } } + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* + * fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + /* reset the link */ if (netif_running(adapter->netdev)) { igb_down(adapter); @@ -1469,33 +1526,22 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 ctrl_reg = 0; - u16 phy_reg = 0; hw->mac.autoneg = false; - switch (hw->phy.type) { - case e1000_phy_m88: - /* Auto-MDI/MDIX Off */ - igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); - /* reset to update Auto-MDI/MDIX */ - igb_write_phy_reg(hw, PHY_CONTROL, 0x9140); - /* autoneg off */ - igb_write_phy_reg(hw, PHY_CONTROL, 0x8140); - break; - case e1000_phy_82580: - /* enable MII loopback */ - igb_write_phy_reg(hw, I82580_PHY_LBK_CTRL, 0x8041); - break; - case e1000_phy_i210: - /* set loopback speed in PHY */ - igb_read_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2), - &phy_reg); - phy_reg |= GS40G_MAC_SPEED_1G; - igb_write_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2), - phy_reg); - ctrl_reg = rd32(E1000_CTRL_EXT); - default: - break; + if (hw->phy.type == e1000_phy_m88) { + if (hw->phy.id != I210_I_PHY_ID) { + /* Auto-MDI/MDIX Off */ + igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); + /* reset to update Auto-MDI/MDIX */ + igb_write_phy_reg(hw, PHY_CONTROL, 0x9140); + /* autoneg off */ + igb_write_phy_reg(hw, PHY_CONTROL, 0x8140); + } else { + /* force 1000, set loopback */ + igb_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); + igb_write_phy_reg(hw, PHY_CONTROL, 0x4140); + } } /* add small delay to avoid loopback test failure */ @@ -1513,7 +1559,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) E1000_CTRL_FD | /* Force Duplex to FULL */ E1000_CTRL_SLU); /* Set link up enable bit */ - if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210)) + if (hw->phy.type == e1000_phy_m88) ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */ wr32(E1000_CTRL, ctrl_reg); @@ -1521,11 +1567,10 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter) /* Disable the receiver on the PHY so when a cable is plugged in, the * PHY does not begin to autoneg when a cable is reconnected to the NIC. */ - if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210)) + if (hw->phy.type == e1000_phy_m88) igb_phy_disable_receiver(adapter); - udelay(500); - + mdelay(500); return 0; } @@ -1785,13 +1830,6 @@ static int igb_loopback_test(struct igb_adapter *adapter, u64 *data) *data = 0; goto out; } - if ((adapter->hw.mac.type == e1000_i210) - || (adapter->hw.mac.type == e1000_i211)) { - dev_err(&adapter->pdev->dev, - "Loopback test not supported on this part at this time.\n"); - *data = 0; - goto out; - } *data = igb_setup_desc_rings(adapter); if (*data) goto out; @@ -2257,6 +2295,54 @@ static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } +static int igb_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (adapter->hw.mac.type) { +#ifdef CONFIG_IGB_PTP + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i210: + case e1000_i211: + info->so_timestamping = + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + if (adapter->ptp_clock) + info->phc_index = ptp_clock_index(adapter->ptp_clock); + else + info->phc_index = -1; + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = 1 << HWTSTAMP_FILTER_NONE; + + /* 82576 does not support timestamping all packets. */ + if (adapter->hw.mac.type >= e1000_82580) + info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL; + else + info->rx_filters |= + (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +#endif /* CONFIG_IGB_PTP */ + default: + return -EOPNOTSUPP; + } +} + static int igb_ethtool_begin(struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); @@ -2270,38 +2356,6 @@ static void igb_ethtool_complete(struct net_device *netdev) pm_runtime_put(&adapter->pdev->dev); } -#ifdef CONFIG_IGB_PTP -static int igb_ethtool_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) -{ - struct igb_adapter *adapter = netdev_priv(dev); - - info->so_timestamping = - SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; - - if (adapter->ptp_clock) - info->phc_index = ptp_clock_index(adapter->ptp_clock); - else - info->phc_index = -1; - - info->tx_types = - (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON); - - info->rx_filters = - (1 << HWTSTAMP_FILTER_NONE) | - (1 << HWTSTAMP_FILTER_ALL) | - (1 << HWTSTAMP_FILTER_SOME) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | - (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | - (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); - - return 0; -} - -#endif static const struct ethtool_ops igb_ethtool_ops = { .get_settings = igb_get_settings, .set_settings = igb_set_settings, @@ -2328,11 +2382,9 @@ static const struct ethtool_ops igb_ethtool_ops = { .get_ethtool_stats = igb_get_ethtool_stats, .get_coalesce = igb_get_coalesce, .set_coalesce = igb_set_coalesce, + .get_ts_info = igb_get_ts_info, .begin = igb_ethtool_begin, .complete = igb_ethtool_complete, -#ifdef CONFIG_IGB_PTP - .get_ts_info = igb_ethtool_get_ts_info, -#endif }; void igb_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 48cc4fb1a30..e1ceb37ef12 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -172,8 +172,7 @@ static void igb_check_vf_rate_limit(struct igb_adapter *); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); -static int igb_find_enabled_vfs(struct igb_adapter *adapter); -static int igb_check_vf_assignment(struct igb_adapter *adapter); +static bool igb_vfs_are_assigned(struct igb_adapter *adapter); #endif #ifdef CONFIG_PM @@ -217,7 +216,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *, static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); static void igb_io_resume(struct pci_dev *); -static struct pci_error_handlers igb_err_handler = { +static const struct pci_error_handlers igb_err_handler = { .error_detected = igb_io_error_detected, .slot_reset = igb_io_slot_reset, .resume = igb_io_resume, @@ -404,8 +403,8 @@ static void igb_dump(struct igb_adapter *adapter) buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp); } @@ -456,8 +455,8 @@ static void igb_dump(struct igb_adapter *adapter) " %04X %p %016llX %p%s\n", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), - (u64)buffer_info->dma, - buffer_info->length, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), buffer_info->next_to_watch, (u64)buffer_info->time_stamp, buffer_info->skb, next_desc); @@ -466,7 +465,8 @@ static void igb_dump(struct igb_adapter *adapter) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 1, buffer_info->skb->data, - buffer_info->length, true); + dma_unmap_len(buffer_info, len), + true); } } @@ -683,52 +683,29 @@ static int igb_alloc_queues(struct igb_adapter *adapter) { struct igb_ring *ring; int i; - int orig_node = adapter->node; for (i = 0; i < adapter->num_tx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); + ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); if (!ring) goto err; ring->count = adapter->tx_ring_count; ring->queue_index = i; ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; /* For 82575, context index must be unique per ring. */ if (adapter->hw.mac.type == e1000_82575) set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); adapter->tx_ring[i] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; for (i = 0; i < adapter->num_rx_queues; i++) { - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL, - adapter->node); - if (!ring) - ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); + ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL); if (!ring) goto err; ring->count = adapter->rx_ring_count; ring->queue_index = i; ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; - ring->numa_node = adapter->node; /* set flag indicating ring supports SCTP checksum offload */ if (adapter->hw.mac.type >= e1000_82576) set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); @@ -742,16 +719,12 @@ static int igb_alloc_queues(struct igb_adapter *adapter) adapter->rx_ring[i] = ring; } - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_cache_ring_register(adapter); return 0; err: - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_free_queues(adapter); return -ENOMEM; @@ -1117,24 +1090,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) struct igb_q_vector *q_vector; struct e1000_hw *hw = &adapter->hw; int v_idx; - int orig_node = adapter->node; for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { - if ((adapter->num_q_vectors == (adapter->num_rx_queues + - adapter->num_tx_queues)) && - (adapter->num_rx_queues == v_idx)) - adapter->node = orig_node; - if (orig_node == -1) { - int cur_node = next_online_node(adapter->node); - if (cur_node == MAX_NUMNODES) - cur_node = first_online_node; - adapter->node = cur_node; - } - q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL, - adapter->node); - if (!q_vector) - q_vector = kzalloc(sizeof(struct igb_q_vector), - GFP_KERNEL); + q_vector = kzalloc(sizeof(struct igb_q_vector), + GFP_KERNEL); if (!q_vector) goto err_out; q_vector->adapter = adapter; @@ -1143,14 +1102,10 @@ static int igb_alloc_q_vectors(struct igb_adapter *adapter) netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64); adapter->q_vector[v_idx] = q_vector; } - /* Restore the adapter's original node */ - adapter->node = orig_node; return 0; err_out: - /* Restore the adapter's original node */ - adapter->node = orig_node; igb_free_q_vectors(adapter); return -ENOMEM; } @@ -1751,6 +1706,11 @@ void igb_reset(struct igb_adapter *adapter) /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); +#ifdef CONFIG_IGB_PTP + /* Re-enable PTP, where applicable. */ + igb_ptp_reset(adapter); +#endif /* CONFIG_IGB_PTP */ + igb_get_phy_info(hw); } @@ -2180,11 +2140,12 @@ static int __devinit igb_probe(struct pci_dev *pdev, } #endif + #ifdef CONFIG_IGB_PTP /* do hw tstamp init after resetting */ igb_ptp_init(adapter); +#endif /* CONFIG_IGB_PTP */ -#endif dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); /* print bus type/speed/width info */ dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n", @@ -2259,9 +2220,9 @@ static void __devexit igb_remove(struct pci_dev *pdev) pm_runtime_get_noresume(&pdev->dev); #ifdef CONFIG_IGB_PTP - igb_ptp_remove(adapter); + igb_ptp_stop(adapter); +#endif /* CONFIG_IGB_PTP */ -#endif /* * The watchdog timer may be rescheduled, so explicitly * disable watchdog from being rescheduled. @@ -2294,11 +2255,11 @@ static void __devexit igb_remove(struct pci_dev *pdev) /* reclaim resources allocated to VFs */ if (adapter->vf_data) { /* disable iov and allow time for transactions to clear */ - if (!igb_check_vf_assignment(adapter)) { + if (igb_vfs_are_assigned(adapter)) { + dev_info(&pdev->dev, "Unloading driver while VFs are assigned - VFs will not be deallocated\n"); + } else { pci_disable_sriov(pdev); msleep(500); - } else { - dev_info(&pdev->dev, "VF(s) assigned to guests!\n"); } kfree(adapter->vf_data); @@ -2338,7 +2299,7 @@ static void __devinit igb_probe_vfs(struct igb_adapter * adapter) #ifdef CONFIG_PCI_IOV struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; - int old_vfs = igb_find_enabled_vfs(adapter); + int old_vfs = pci_num_vf(adapter->pdev); int i; /* Virtualization features not supported on i210 family. */ @@ -2418,8 +2379,6 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; - adapter->node = -1; - spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { @@ -2666,13 +2625,11 @@ static int igb_close(struct net_device *netdev) int igb_setup_tx_resources(struct igb_ring *tx_ring) { struct device *dev = tx_ring->dev; - int orig_node = dev_to_node(dev); int size; size = sizeof(struct igb_tx_buffer) * tx_ring->count; - tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node); - if (!tx_ring->tx_buffer_info) - tx_ring->tx_buffer_info = vzalloc(size); + + tx_ring->tx_buffer_info = vzalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -2680,18 +2637,10 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); tx_ring->size = ALIGN(tx_ring->size, 4096); - set_dev_node(dev, tx_ring->numa_node); tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!tx_ring->desc) - tx_ring->desc = dma_alloc_coherent(dev, - tx_ring->size, - &tx_ring->dma, - GFP_KERNEL); - if (!tx_ring->desc) goto err; @@ -2702,8 +2651,8 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) err: vfree(tx_ring->tx_buffer_info); - dev_err(dev, - "Unable to allocate memory for the transmit descriptor ring\n"); + tx_ring->tx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); return -ENOMEM; } @@ -2820,34 +2769,23 @@ static void igb_configure_tx(struct igb_adapter *adapter) int igb_setup_rx_resources(struct igb_ring *rx_ring) { struct device *dev = rx_ring->dev; - int orig_node = dev_to_node(dev); - int size, desc_len; + int size; size = sizeof(struct igb_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node); - if (!rx_ring->rx_buffer_info) - rx_ring->rx_buffer_info = vzalloc(size); + + rx_ring->rx_buffer_info = vzalloc(size); if (!rx_ring->rx_buffer_info) goto err; - desc_len = sizeof(union e1000_adv_rx_desc); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * desc_len; + rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - set_dev_node(dev, rx_ring->numa_node); rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); - set_dev_node(dev, orig_node); - if (!rx_ring->desc) - rx_ring->desc = dma_alloc_coherent(dev, - rx_ring->size, - &rx_ring->dma, - GFP_KERNEL); - if (!rx_ring->desc) goto err; @@ -2859,8 +2797,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) err: vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; - dev_err(dev, "Unable to allocate memory for the receive descriptor" - " ring\n"); + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); return -ENOMEM; } @@ -2898,57 +2835,48 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; u32 mrqc, rxcsum; - u32 j, num_rx_queues, shift = 0, shift2 = 0; - union e1000_reta { - u32 dword; - u8 bytes[4]; - } reta; - static const u8 rsshash[40] = { - 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, - 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, - 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, - 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; + u32 j, num_rx_queues, shift = 0; + static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, + 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, + 0xA32DCB77, 0x0CF23080, 0x3BB7426A, + 0xFA01ACBE }; /* Fill out hash function seeds */ - for (j = 0; j < 10; j++) { - u32 rsskey = rsshash[(j * 4)]; - rsskey |= rsshash[(j * 4) + 1] << 8; - rsskey |= rsshash[(j * 4) + 2] << 16; - rsskey |= rsshash[(j * 4) + 3] << 24; - array_wr32(E1000_RSSRK(0), j, rsskey); - } + for (j = 0; j < 10; j++) + wr32(E1000_RSSRK(j), rsskey[j]); num_rx_queues = adapter->rss_queues; - if (adapter->vfs_allocated_count) { - /* 82575 and 82576 supports 2 RSS queues for VMDq */ - switch (hw->mac.type) { - case e1000_i350: - case e1000_82580: - num_rx_queues = 1; - shift = 0; - break; - case e1000_82576: + switch (hw->mac.type) { + case e1000_82575: + shift = 6; + break; + case e1000_82576: + /* 82576 supports 2 RSS queues for SR-IOV */ + if (adapter->vfs_allocated_count) { shift = 3; num_rx_queues = 2; - break; - case e1000_82575: - shift = 2; - shift2 = 6; - default: - break; } - } else { - if (hw->mac.type == e1000_82575) - shift = 6; + break; + default: + break; } - for (j = 0; j < (32 * 4); j++) { - reta.bytes[j & 3] = (j % num_rx_queues) << shift; - if (shift2) - reta.bytes[j & 3] |= num_rx_queues << shift2; - if ((j & 3) == 3) - wr32(E1000_RETA(j >> 2), reta.dword); + /* + * Populate the indirection table 4 entries at a time. To do this + * we are generating the results for n and n+2 and then interleaving + * those with the results with n+1 and n+3. + */ + for (j = 0; j < 32; j++) { + /* first pass generates n and n+2 */ + u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues; + u32 reta = (base & 0x07800780) >> (7 - shift); + + /* second pass generates n+1 and n+3 */ + base += 0x00010001 * num_rx_queues; + reta |= (base & 0x07800780) << (1 + shift); + + wr32(E1000_RETA(j), reta); } /* @@ -3184,8 +3112,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT; #endif srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +#ifdef CONFIG_IGB_PTP if (hw->mac.type >= e1000_82580) srrctl |= E1000_SRRCTL_TIMESTAMP; +#endif /* CONFIG_IGB_PTP */ /* Only set Drop Enable if we are supporting multiple queues */ if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1) srrctl |= E1000_SRRCTL_DROP_EN; @@ -3269,20 +3199,20 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *ring, { if (tx_buffer->skb) { dev_kfree_skb_any(tx_buffer->skb); - if (tx_buffer->dma) + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); - } else if (tx_buffer->dma) { + } else if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; - tx_buffer->dma = 0; + dma_unmap_len_set(tx_buffer, len, 0); /* buffer_info must be completely set up in the transmit path */ } @@ -4229,9 +4159,11 @@ static __le32 igb_tx_cmd_type(u32 tx_flags) if (tx_flags & IGB_TX_FLAGS_VLAN) cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); +#ifdef CONFIG_IGB_PTP /* set timestamp bit if present */ - if (tx_flags & IGB_TX_FLAGS_TSTAMP) + if (unlikely(tx_flags & IGB_TX_FLAGS_TSTAMP)) cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); +#endif /* CONFIG_IGB_PTP */ /* set segmentation bits for TSO */ if (tx_flags & IGB_TX_FLAGS_TSO) @@ -4275,7 +4207,7 @@ static void igb_tx_map(struct igb_ring *tx_ring, const u8 hdr_len) { struct sk_buff *skb = first->skb; - struct igb_tx_buffer *tx_buffer_info; + struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; dma_addr_t dma; struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; @@ -4296,8 +4228,8 @@ static void igb_tx_map(struct igb_ring *tx_ring, goto dma_error; /* record length, and DMA address */ - first->length = size; - first->dma = dma; + dma_unmap_len_set(first, len, size); + dma_unmap_addr_set(first, dma, dma); tx_desc->read.buffer_addr = cpu_to_le64(dma); for (;;) { @@ -4339,9 +4271,9 @@ static void igb_tx_map(struct igb_ring *tx_ring, if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - tx_buffer_info->length = size; - tx_buffer_info->dma = dma; + tx_buffer = &tx_ring->tx_buffer_info[i]; + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); tx_desc->read.olinfo_status = 0; tx_desc->read.buffer_addr = cpu_to_le64(dma); @@ -4392,9 +4324,9 @@ dma_error: /* clear dma mappings for failed tx_buffer_info map */ for (;;) { - tx_buffer_info = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); - if (tx_buffer_info == first) + tx_buffer = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) break; if (i == 0) i = tx_ring->count; @@ -4440,6 +4372,9 @@ static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { +#ifdef CONFIG_IGB_PTP + struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); +#endif /* CONFIG_IGB_PTP */ struct igb_tx_buffer *first; int tso; u32 tx_flags = 0; @@ -4462,10 +4397,17 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, first->bytecount = skb->len; first->gso_segs = 1; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { +#ifdef CONFIG_IGB_PTP + if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + !(adapter->ptp_tx_skb))) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGB_TX_FLAGS_TSTAMP; + + adapter->ptp_tx_skb = skb_get(skb); + if (adapter->hw.mac.type == e1000_82576) + schedule_work(&adapter->ptp_tx_work); } +#endif /* CONFIG_IGB_PTP */ if (vlan_tx_tag_present(skb)) { tx_flags |= IGB_TX_FLAGS_VLAN; @@ -4661,11 +4603,13 @@ void igb_update_stats(struct igb_adapter *adapter, bytes = 0; packets = 0; for (i = 0; i < adapter->num_rx_queues; i++) { - u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF; + u32 rqdpc = rd32(E1000_RQDPC(i)); struct igb_ring *ring = adapter->rx_ring[i]; - ring->rx_stats.drops += rqdpc_tmp; - net_stats->rx_fifo_errors += rqdpc_tmp; + if (rqdpc) { + ring->rx_stats.drops += rqdpc; + net_stats->rx_fifo_errors += rqdpc; + } do { start = u64_stats_fetch_begin_bh(&ring->rx_syncp); @@ -4755,7 +4699,11 @@ void igb_update_stats(struct igb_adapter *adapter, reg = rd32(E1000_CTRL_EXT); if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { adapter->stats.rxerrc += rd32(E1000_RXERRC); - adapter->stats.tncrs += rd32(E1000_TNCRS); + + /* this stat has invalid values on i210/i211 */ + if ((hw->mac.type != e1000_i210) && + (hw->mac.type != e1000_i211)) + adapter->stats.tncrs += rd32(E1000_TNCRS); } adapter->stats.tsctc += rd32(E1000_TSCTC); @@ -4852,6 +4800,19 @@ static irqreturn_t igb_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + wr32(E1000_EIMS, adapter->eims_other); return IRQ_HANDLED; @@ -5002,102 +4963,43 @@ static int igb_notify_dca(struct notifier_block *nb, unsigned long event, static int igb_vf_configure(struct igb_adapter *adapter, int vf) { unsigned char mac_addr[ETH_ALEN]; - struct pci_dev *pdev = adapter->pdev; - struct e1000_hw *hw = &adapter->hw; - struct pci_dev *pvfdev; - unsigned int device_id; - u16 thisvf_devfn; eth_random_addr(mac_addr); igb_set_vf_mac(adapter, vf, mac_addr); - switch (adapter->hw.mac.type) { - case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) | - (pdev->devfn & 1); - break; - case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) | - (pdev->devfn & 3); - break; - default: - device_id = 0; - thisvf_devfn = 0; - break; - } - - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == thisvf_devfn) - break; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); - } - - if (pvfdev) - adapter->vf_data[vf].vfdev = pvfdev; - else - dev_err(&pdev->dev, - "Couldn't find pci dev ptr for VF %4.4x\n", - thisvf_devfn); - return pvfdev != NULL; + return 0; } -static int igb_find_enabled_vfs(struct igb_adapter *adapter) +static bool igb_vfs_are_assigned(struct igb_adapter *adapter) { - struct e1000_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct pci_dev *pvfdev; - u16 vf_devfn = 0; - u16 vf_stride; - unsigned int device_id; - int vfs_found = 0; + struct pci_dev *vfdev; + int dev_id; switch (adapter->hw.mac.type) { case e1000_82576: - device_id = IGB_82576_VF_DEV_ID; - /* VF Stride for 82576 is 2 */ - vf_stride = 2; + dev_id = IGB_82576_VF_DEV_ID; break; case e1000_i350: - device_id = IGB_I350_VF_DEV_ID; - /* VF Stride for I350 is 4 */ - vf_stride = 4; + dev_id = IGB_I350_VF_DEV_ID; break; default: - device_id = 0; - vf_stride = 0; - break; - } - - vf_devfn = pdev->devfn + 0x80; - pvfdev = pci_get_device(hw->vendor_id, device_id, NULL); - while (pvfdev) { - if (pvfdev->devfn == vf_devfn && - (pvfdev->bus->number >= pdev->bus->number)) - vfs_found++; - vf_devfn += vf_stride; - pvfdev = pci_get_device(hw->vendor_id, - device_id, pvfdev); + return false; } - return vfs_found; -} - -static int igb_check_vf_assignment(struct igb_adapter *adapter) -{ - int i; - for (i = 0; i < adapter->vfs_allocated_count; i++) { - if (adapter->vf_data[i].vfdev) { - if (adapter->vf_data[i].vfdev->dev_flags & - PCI_DEV_FLAGS_ASSIGNED) + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL); + while (vfdev) { + /* if we don't own it we don't care */ + if (vfdev->is_virtfn && vfdev->physfn == pdev) { + /* if it is assigned we cannot release it */ + if (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) return true; } + + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, vfdev); } + return false; } @@ -5643,6 +5545,19 @@ static irqreturn_t igb_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5684,6 +5599,19 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } +#ifdef CONFIG_IGB_PTP + if (icr & E1000_ICR_TS) { + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* CONFIG_IGB_PTP */ + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -5743,37 +5671,6 @@ static int igb_poll(struct napi_struct *napi, int budget) return 0; } -#ifdef CONFIG_IGB_PTP -/** - * igb_tx_hwtstamp - utility function which checks for TX time stamp - * @q_vector: pointer to q_vector containing needed info - * @buffer: pointer to igb_tx_buffer structure - * - * If we were asked to do hardware stamping and such a time stamp is - * available, then it must have been for this skb here because we only - * allow only one such packet into the queue. - */ -static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, - struct igb_tx_buffer *buffer_info) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - struct skb_shared_hwtstamps shhwtstamps; - u64 regval; - - /* if skb does not support hw timestamp or TX stamp not valid exit */ - if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) || - !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) - return; - - regval = rd32(E1000_TXSTMPL); - regval |= (u64)rd32(E1000_TXSTMPH) << 32; - - igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval); - skb_tstamp_tx(buffer_info->skb, &shhwtstamps); -} - -#endif /** * igb_clean_tx_irq - Reclaim resources after transmit completes * @q_vector: pointer to q_vector containing needed info @@ -5785,7 +5682,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx.ring; struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc, *eop_desc; + union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx.work_limit; unsigned int i = tx_ring->next_to_clean; @@ -5797,16 +5694,16 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_desc = IGB_TX_DESC(tx_ring, i); i -= tx_ring->count; - for (; budget; budget--) { - eop_desc = tx_buffer->next_to_watch; - - /* prevent any other reads prior to eop_desc */ - rmb(); + do { + union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; + /* prevent any other reads prior to eop_desc */ + rmb(); + /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) break; @@ -5818,25 +5715,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) total_bytes += tx_buffer->bytecount; total_packets += tx_buffer->gso_segs; -#ifdef CONFIG_IGB_PTP - /* retrieve hardware timestamp */ - igb_tx_hwtstamp(q_vector, tx_buffer); - -#endif /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; /* unmap skb header data */ dma_unmap_single(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* clear last DMA location and unmap remaining buffers */ while (tx_desc != eop_desc) { - tx_buffer->dma = 0; - tx_buffer++; tx_desc++; i++; @@ -5847,17 +5740,15 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) } /* unmap any remaining paged data */ - if (tx_buffer->dma) { + if (dma_unmap_len(tx_buffer, len)) { dma_unmap_page(tx_ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); } } - /* clear last DMA location */ - tx_buffer->dma = 0; - /* move us one more past the eop_desc for start of next pkt */ tx_buffer++; tx_desc++; @@ -5867,7 +5758,13 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_buffer = tx_ring->tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } - } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); @@ -5883,12 +5780,10 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { struct e1000_hw *hw = &adapter->hw; - eop_desc = tx_buffer->next_to_watch; - /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); - if (eop_desc && + if (tx_buffer->next_to_watch && time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { @@ -5912,9 +5807,9 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_ring->next_to_use, tx_ring->next_to_clean, tx_buffer->time_stamp, - eop_desc, + tx_buffer->next_to_watch, jiffies, - eop_desc->wb.status); + tx_buffer->next_to_watch->wb.status); netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); @@ -5994,47 +5889,6 @@ static inline void igb_rx_hash(struct igb_ring *ring, skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); } -#ifdef CONFIG_IGB_PTP -static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, - union e1000_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - struct igb_adapter *adapter = q_vector->adapter; - struct e1000_hw *hw = &adapter->hw; - u64 regval; - - if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | - E1000_RXDADV_STAT_TS)) - return; - - /* - * If this bit is set, then the RX registers contain the time stamp. No - * other packet will be time stamped until we read these registers, so - * read the registers to make them available again. Because only one - * packet can be time stamped at a time, we know that the register - * values must belong to this one here and therefore we don't need to - * compare any of the additional attributes stored for it. - * - * If nothing went wrong, then it should have a shared tx_flags that we - * can turn into a skb_shared_hwtstamps. - */ - if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - u32 *stamp = (u32 *)skb->data; - regval = le32_to_cpu(*(stamp + 2)); - regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; - skb_pull(skb, IGB_TS_HDR_LEN); - } else { - if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) - return; - - regval = rd32(E1000_RXSTMPL); - regval |= (u64)rd32(E1000_RXSTMPH) << 32; - } - - igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); -} - -#endif static void igb_rx_vlan(struct igb_ring *ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) @@ -6146,8 +6000,8 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) } #ifdef CONFIG_IGB_PTP - igb_rx_hwtstamp(q_vector, rx_desc, skb); -#endif + igb_ptp_rx_hwtstamp(q_vector, rx_desc, skb); +#endif /* CONFIG_IGB_PTP */ igb_rx_hash(rx_ring, rx_desc, skb); igb_rx_checksum(rx_ring, rx_desc, skb); igb_rx_vlan(rx_ring, rx_desc, skb); @@ -6341,181 +6195,6 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) } /** - * igb_hwtstamp_ioctl - control hardware time stamping - * @netdev: - * @ifreq: - * @cmd: - * - * Outgoing time stamping can be enabled and disabled. Play nice and - * disable it when requested, although it shouldn't case any overhead - * when no packet needs it. At most one packet in the queue may be - * marked for time stamping, otherwise it would be impossible to tell - * for sure to which packet the hardware time stamp belongs. - * - * Incoming time stamping has to be configured via the hardware - * filters. Not all combinations are supported, in particular event - * type has to be specified. Matching the kind of event packet is - * not supported, with the exception of "all V2 events regardless of - * level 2 or 4". - * - **/ -static int igb_hwtstamp_ioctl(struct net_device *netdev, - struct ifreq *ifr, int cmd) -{ - struct igb_adapter *adapter = netdev_priv(netdev); - struct e1000_hw *hw = &adapter->hw; - struct hwtstamp_config config; - u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; - u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - u32 tsync_rx_cfg = 0; - bool is_l4 = false; - bool is_l2 = false; - u32 regval; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - /* reserved for future extensions */ - if (config.flags) - return -EINVAL; - - switch (config.tx_type) { - case HWTSTAMP_TX_OFF: - tsync_tx_ctl = 0; - case HWTSTAMP_TX_ON: - break; - default: - return -ERANGE; - } - - switch (config.rx_filter) { - case HWTSTAMP_FILTER_NONE: - tsync_rx_ctl = 0; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_ALL: - /* - * register TSYNCRXCFG must be set, therefore it is not - * possible to time stamp both Sync and Delay_Req messages - * => fall back to time stamping all packets - */ - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - config.rx_filter = HWTSTAMP_FILTER_ALL; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; - is_l4 = true; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: - case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_SYNC: - case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = true; - is_l4 = true; - break; - default: - return -ERANGE; - } - - if (hw->mac.type == e1000_82575) { - if (tsync_rx_ctl | tsync_tx_ctl) - return -EINVAL; - return 0; - } - - /* - * Per-packet timestamping only works if all packets are - * timestamped, so enable timestamping in all packets as - * long as one rx filter was configured. - */ - if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { - tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; - tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; - } - - /* enable/disable TX */ - regval = rd32(E1000_TSYNCTXCTL); - regval &= ~E1000_TSYNCTXCTL_ENABLED; - regval |= tsync_tx_ctl; - wr32(E1000_TSYNCTXCTL, regval); - - /* enable/disable RX */ - regval = rd32(E1000_TSYNCRXCTL); - regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); - regval |= tsync_rx_ctl; - wr32(E1000_TSYNCRXCTL, regval); - - /* define which PTP packets are time stamped */ - wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); - - /* define ethertype filter for timestamped packets */ - if (is_l2) - wr32(E1000_ETQF(3), - (E1000_ETQF_FILTER_ENABLE | /* enable filter */ - E1000_ETQF_1588 | /* enable timestamping */ - ETH_P_1588)); /* 1588 eth protocol type */ - else - wr32(E1000_ETQF(3), 0); - -#define PTP_PORT 319 - /* L4 Queue Filter[3]: filter by destination port and protocol */ - if (is_l4) { - u32 ftqf = (IPPROTO_UDP /* UDP */ - | E1000_FTQF_VF_BP /* VF not compared */ - | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ - | E1000_FTQF_MASK); /* mask all inputs */ - ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ - - wr32(E1000_IMIR(3), htons(PTP_PORT)); - wr32(E1000_IMIREXT(3), - (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); - if (hw->mac.type == e1000_82576) { - /* enable source port check */ - wr32(E1000_SPQF(3), htons(PTP_PORT)); - ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; - } - wr32(E1000_FTQF(3), ftqf); - } else { - wr32(E1000_FTQF(3), E1000_FTQF_MASK); - } - wrfl(); - - adapter->hwtstamp_config = config; - - /* clear TX/RX time stamp registers, just to be sure */ - regval = rd32(E1000_TXSTMPH); - regval = rd32(E1000_RXSTMPH); - - return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? - -EFAULT : 0; -} - -/** * igb_ioctl - * @netdev: * @ifreq: @@ -6528,8 +6207,10 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGMIIREG: case SIOCSMIIREG: return igb_mii_ioctl(netdev, ifr, cmd); +#ifdef CONFIG_IGB_PTP case SIOCSHWTSTAMP: - return igb_hwtstamp_ioctl(netdev, ifr, cmd); + return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd); +#endif /* CONFIG_IGB_PTP */ default: return -EOPNOTSUPP; } @@ -6538,28 +6219,20 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) { struct igb_adapter *adapter = hw->back; - u16 cap_offset; - cap_offset = adapter->pdev->pcie_cap; - if (!cap_offset) + if (pcie_capability_read_word(adapter->pdev, reg, value)) return -E1000_ERR_CONFIG; - pci_read_config_word(adapter->pdev, cap_offset + reg, value); - return 0; } s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) { struct igb_adapter *adapter = hw->back; - u16 cap_offset; - cap_offset = adapter->pdev->pcie_cap; - if (!cap_offset) + if (pcie_capability_write_word(adapter->pdev, reg, *value)) return -E1000_ERR_CONFIG; - pci_write_config_word(adapter->pdev, cap_offset + reg, *value); - return 0; } @@ -6675,6 +6348,10 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx) default: goto err_inval; } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + return 0; err_inval: diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index c846ea9131a..ee21445157a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -69,22 +69,22 @@ * 2^40 * 10^-9 / 60 = 18.3 minutes. */ -#define IGB_OVERFLOW_PERIOD (HZ * 60 * 9) -#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) -#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) -#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) -#define IGB_NBITS_82580 40 +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) +#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) +#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) +#define IGB_NBITS_82580 40 /* * SYSTIM read access for the 82576 */ -static cycle_t igb_82576_systim_read(const struct cyclecounter *cc) +static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) { - u64 val; - u32 lo, hi; struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi; lo = rd32(E1000_SYSTIML); hi = rd32(E1000_SYSTIMH); @@ -99,12 +99,12 @@ static cycle_t igb_82576_systim_read(const struct cyclecounter *cc) * SYSTIM read access for the 82580 */ -static cycle_t igb_82580_systim_read(const struct cyclecounter *cc) +static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) { - u64 val; - u32 lo, hi, jk; struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi, jk; /* * The timestamp latches on lowest register read. For the 82580 @@ -122,16 +122,101 @@ static cycle_t igb_82580_systim_read(const struct cyclecounter *cc) } /* + * SYSTIM read access for I210/I211 + */ + +static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + u32 sec, nsec, jk; + + /* + * The timestamp latches on lowest register read. For I210/I211, the + * lowest register is SYSTIMR. Since we only need to provide nanosecond + * resolution, we can ignore it. + */ + jk = rd32(E1000_SYSTIMR); + nsec = rd32(E1000_SYSTIML); + sec = rd32(E1000_SYSTIMH); + + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static void igb_ptp_write_i210(struct igb_adapter *adapter, + const struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + + /* + * Writing the SYSTIMR register is not necessary as it only provides + * sub-nanosecond resolution. + */ + wr32(E1000_SYSTIML, ts->tv_nsec); + wr32(E1000_SYSTIMH, ts->tv_sec); +} + +/** + * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp + * @adapter: board private structure + * @hwtstamps: timestamp structure to update + * @systim: unsigned 64bit system time value. + * + * We need to convert the system time value stored in the RX/TXSTMP registers + * into a hwtstamp which can be used by the upper level timestamping functions. + * + * The 'tmreg_lock' spinlock is used to protect the consistency of the + * system time value. This is needed because reading the 64 bit time + * value involves reading two (or three) 32 bit registers. The first + * read latches the value. Ditto for writing. + * + * In addition, here have extended the system time with an overflow + * counter in software. + **/ +static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamps, + u64 systim) +{ + unsigned long flags; + u64 ns; + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_82580: + case e1000_i350: + spin_lock_irqsave(&adapter->tmreg_lock, flags); + + ns = timecounter_cyc2time(&adapter->tc, systim); + + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + memset(hwtstamps, 0, sizeof(*hwtstamps)); + hwtstamps->hwtstamp = ns_to_ktime(ns); + break; + case e1000_i210: + case e1000_i211: + memset(hwtstamps, 0, sizeof(*hwtstamps)); + /* Upper 32 bits contain s, lower 32 bits contain ns. */ + hwtstamps->hwtstamp = ktime_set(systim >> 32, + systim & 0xFFFFFFFF); + break; + default: + break; + } +} + +/* * PTP clock operations */ -static int ptp_82576_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; u64 rate; u32 incvalue; - int neg_adj = 0; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); - struct e1000_hw *hw = &igb->hw; if (ppb < 0) { neg_adj = 1; @@ -153,13 +238,14 @@ static int ptp_82576_adjfreq(struct ptp_clock_info *ptp, s32 ppb) return 0; } -static int ptp_82580_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; u64 rate; u32 inca; - int neg_adj = 0; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); - struct e1000_hw *hw = &igb->hw; if (ppb < 0) { neg_adj = 1; @@ -178,11 +264,12 @@ static int ptp_82580_adjfreq(struct ptp_clock_info *ptp, s32 ppb) return 0; } -static int igb_adjtime(struct ptp_clock_info *ptp, s64 delta) +static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta) { - s64 now; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); + s64 now; spin_lock_irqsave(&igb->tmreg_lock, flags); @@ -195,12 +282,32 @@ static int igb_adjtime(struct ptp_clock_info *ptp, s64 delta) return 0; } -static int igb_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta) { + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + struct timespec now, then = ns_to_timespec(delta); + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, &now); + now = timespec_add(now, then); + igb_ptp_write_i210(igb, (const struct timespec *)&now); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp, + struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; u64 ns; u32 remainder; - unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); spin_lock_irqsave(&igb->tmreg_lock, flags); @@ -214,11 +321,29 @@ static int igb_gettime(struct ptp_clock_info *ptp, struct timespec *ts) return 0; } -static int igb_settime(struct ptp_clock_info *ptp, const struct timespec *ts) +static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp, + struct timespec *ts) { - u64 ns; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); unsigned long flags; - struct igb_adapter *igb = container_of(ptp, struct igb_adapter, caps); + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_settime_82576(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + u64 ns; ns = ts->tv_sec * 1000000000ULL; ns += ts->tv_nsec; @@ -232,77 +357,369 @@ static int igb_settime(struct ptp_clock_info *ptp, const struct timespec *ts) return 0; } -static int ptp_82576_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, + const struct timespec *ts) { - return -EOPNOTSUPP; + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_write_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; } -static int ptp_82580_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +static int igb_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) { return -EOPNOTSUPP; } -static void igb_overflow_check(struct work_struct *work) +/** + * igb_ptp_tx_work + * @work: pointer to work struct + * + * This work function polls the TSYNCTXCTL valid bit to determine when a + * timestamp has been taken for the current stored skb. + */ +void igb_ptp_tx_work(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, struct igb_adapter, + ptp_tx_work); + struct e1000_hw *hw = &adapter->hw; + u32 tsynctxctl; + + if (!adapter->ptp_tx_skb) + return; + + tsynctxctl = rd32(E1000_TSYNCTXCTL); + if (tsynctxctl & E1000_TSYNCTXCTL_VALID) + igb_ptp_tx_hwtstamp(adapter); + else + /* reschedule to check later */ + schedule_work(&adapter->ptp_tx_work); +} + +static void igb_ptp_overflow_check(struct work_struct *work) { - struct timespec ts; struct igb_adapter *igb = - container_of(work, struct igb_adapter, overflow_work.work); + container_of(work, struct igb_adapter, ptp_overflow_work.work); + struct timespec ts; - igb_gettime(&igb->caps, &ts); + igb->ptp_caps.gettime(&igb->ptp_caps, &ts); pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); - schedule_delayed_work(&igb->overflow_work, IGB_OVERFLOW_PERIOD); + schedule_delayed_work(&igb->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); +} + +/** + * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp + * @adapter: Board private structure. + * + * If we were asked to do hardware stamping and such a time stamp is + * available, then it must have been for this skb here because we only + * allow only one such packet into the queue. + */ +void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct skb_shared_hwtstamps shhwtstamps; + u64 regval; + + regval = rd32(E1000_TXSTMPL); + regval |= (u64)rd32(E1000_TXSTMPH) << 32; + + igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; +} + +void igb_ptp_rx_hwtstamp(struct igb_q_vector *q_vector, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + u64 regval; + + if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP | + E1000_RXDADV_STAT_TS)) + return; + + /* + * If this bit is set, then the RX registers contain the time stamp. No + * other packet will be time stamped until we read these registers, so + * read the registers to make them available again. Because only one + * packet can be time stamped at a time, we know that the register + * values must belong to this one here and therefore we don't need to + * compare any of the additional attributes stored for it. + * + * If nothing went wrong, then it should have a shared tx_flags that we + * can turn into a skb_shared_hwtstamps. + */ + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + u32 *stamp = (u32 *)skb->data; + regval = le32_to_cpu(*(stamp + 2)); + regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32; + skb_pull(skb, IGB_TS_HDR_LEN); + } else { + if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) + return; + + regval = rd32(E1000_RXSTMPL); + regval |= (u64)rd32(E1000_RXSTMPH) << 32; + } + + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); +} + +/** + * igb_ptp_hwtstamp_ioctl - control hardware time stamping + * @netdev: + * @ifreq: + * @cmd: + * + * Outgoing time stamping can be enabled and disabled. Play nice and + * disable it when requested, although it shouldn't case any overhead + * when no packet needs it. At most one packet in the queue may be + * marked for time stamping, otherwise it would be impossible to tell + * for sure to which packet the hardware time stamp belongs. + * + * Incoming time stamping has to be configured via the hardware + * filters. Not all combinations are supported, in particular event + * type has to be specified. Matching the kind of event packet is + * not supported, with the exception of "all V2 events regardless of + * level 2 or 4". + * + **/ +int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct hwtstamp_config config; + u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + u32 tsync_rx_cfg = 0; + bool is_l4 = false; + bool is_l2 = false; + u32 regval; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + tsync_tx_ctl = 0; + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl = 0; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_ALL: + /* + * register TSYNCRXCFG must be set, therefore it is not + * possible to time stamp both Sync and Delay_Req messages + * => fall back to time stamping all packets + */ + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE; + is_l2 = true; + is_l4 = true; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE; + is_l2 = true; + is_l4 = true; + config.rx_filter = HWTSTAMP_FILTER_SOME; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = true; + is_l4 = true; + break; + default: + return -ERANGE; + } + + if (hw->mac.type == e1000_82575) { + if (tsync_rx_ctl | tsync_tx_ctl) + return -EINVAL; + return 0; + } + + /* + * Per-packet timestamping only works if all packets are + * timestamped, so enable timestamping in all packets as + * long as one rx filter was configured. + */ + if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { + tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + + if ((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) { + regval = rd32(E1000_RXPBS); + regval |= E1000_RXPBS_CFG_TS_EN; + wr32(E1000_RXPBS, regval); + } + } + + /* enable/disable TX */ + regval = rd32(E1000_TSYNCTXCTL); + regval &= ~E1000_TSYNCTXCTL_ENABLED; + regval |= tsync_tx_ctl; + wr32(E1000_TSYNCTXCTL, regval); + + /* enable/disable RX */ + regval = rd32(E1000_TSYNCRXCTL); + regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); + regval |= tsync_rx_ctl; + wr32(E1000_TSYNCRXCTL, regval); + + /* define which PTP packets are time stamped */ + wr32(E1000_TSYNCRXCFG, tsync_rx_cfg); + + /* define ethertype filter for timestamped packets */ + if (is_l2) + wr32(E1000_ETQF(3), + (E1000_ETQF_FILTER_ENABLE | /* enable filter */ + E1000_ETQF_1588 | /* enable timestamping */ + ETH_P_1588)); /* 1588 eth protocol type */ + else + wr32(E1000_ETQF(3), 0); + +#define PTP_PORT 319 + /* L4 Queue Filter[3]: filter by destination port and protocol */ + if (is_l4) { + u32 ftqf = (IPPROTO_UDP /* UDP */ + | E1000_FTQF_VF_BP /* VF not compared */ + | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ + | E1000_FTQF_MASK); /* mask all inputs */ + ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ + + wr32(E1000_IMIR(3), htons(PTP_PORT)); + wr32(E1000_IMIREXT(3), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); + if (hw->mac.type == e1000_82576) { + /* enable source port check */ + wr32(E1000_SPQF(3), htons(PTP_PORT)); + ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; + } + wr32(E1000_FTQF(3), ftqf); + } else { + wr32(E1000_FTQF(3), E1000_FTQF_MASK); + } + wrfl(); + + /* clear TX/RX time stamp registers, just to be sure */ + regval = rd32(E1000_TXSTMPL); + regval = rd32(E1000_TXSTMPH); + regval = rd32(E1000_RXSTMPL); + regval = rd32(E1000_RXSTMPH); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; } void igb_ptp_init(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; switch (hw->mac.type) { - case e1000_i210: - case e1000_i211: - case e1000_i350: + case e1000_82576: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 1000000000; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82576; + adapter->cc.mask = CLOCKSOURCE_MASK(64); + adapter->cc.mult = 1; + adapter->cc.shift = IGB_82576_TSYNC_SHIFT; + /* Dial the nominal frequency. */ + wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + break; case e1000_82580: - adapter->caps.owner = THIS_MODULE; - strcpy(adapter->caps.name, "igb-82580"); - adapter->caps.max_adj = 62499999; - adapter->caps.n_ext_ts = 0; - adapter->caps.pps = 0; - adapter->caps.adjfreq = ptp_82580_adjfreq; - adapter->caps.adjtime = igb_adjtime; - adapter->caps.gettime = igb_gettime; - adapter->caps.settime = igb_settime; - adapter->caps.enable = ptp_82580_enable; - adapter->cc.read = igb_82580_systim_read; - adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); - adapter->cc.mult = 1; - adapter->cc.shift = 0; + case e1000_i350: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82580; + adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); + adapter->cc.mult = 1; + adapter->cc.shift = 0; /* Enable the timer functions by clearing bit 31. */ wr32(E1000_TSAUXC, 0x0); break; - - case e1000_82576: - adapter->caps.owner = THIS_MODULE; - strcpy(adapter->caps.name, "igb-82576"); - adapter->caps.max_adj = 1000000000; - adapter->caps.n_ext_ts = 0; - adapter->caps.pps = 0; - adapter->caps.adjfreq = ptp_82576_adjfreq; - adapter->caps.adjtime = igb_adjtime; - adapter->caps.gettime = igb_gettime; - adapter->caps.settime = igb_settime; - adapter->caps.enable = ptp_82576_enable; - adapter->cc.read = igb_82576_systim_read; - adapter->cc.mask = CLOCKSOURCE_MASK(64); - adapter->cc.mult = 1; - adapter->cc.shift = IGB_82576_TSYNC_SHIFT; - /* Dial the nominal frequency. */ - wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + case e1000_i210: + case e1000_i211: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; + adapter->ptp_caps.gettime = igb_ptp_gettime_i210; + adapter->ptp_caps.settime = igb_ptp_settime_i210; + adapter->ptp_caps.enable = igb_ptp_enable; + /* Enable the timer functions by clearing bit 31. */ + wr32(E1000_TSAUXC, 0x0); break; - default: adapter->ptp_clock = NULL; return; @@ -310,86 +727,114 @@ void igb_ptp_init(struct igb_adapter *adapter) wrfl(); - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); + + /* Initialize the clock and overflow work for devices that need it. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); - INIT_DELAYED_WORK(&adapter->overflow_work, igb_overflow_check); + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); - spin_lock_init(&adapter->tmreg_lock); + INIT_DELAYED_WORK(&adapter->ptp_overflow_work, + igb_ptp_overflow_check); - schedule_delayed_work(&adapter->overflow_work, IGB_OVERFLOW_PERIOD); + schedule_delayed_work(&adapter->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); + } + + /* Initialize the time sync interrupts for devices that support it. */ + if (hw->mac.type >= e1000_82580) { + wr32(E1000_TSIM, E1000_TSIM_TXTS); + wr32(E1000_IMS, E1000_IMS_TS); + } - adapter->ptp_clock = ptp_clock_register(&adapter->caps); + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else + } else { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); + adapter->flags |= IGB_FLAG_PTP; + } } -void igb_ptp_remove(struct igb_adapter *adapter) +/** + * igb_ptp_stop - Disable PTP device and stop the overflow check. + * @adapter: Board private structure. + * + * This function stops the PTP support and cancels the delayed work. + **/ +void igb_ptp_stop(struct igb_adapter *adapter) { switch (adapter->hw.mac.type) { - case e1000_i211: - case e1000_i210: - case e1000_i350: - case e1000_82580: case e1000_82576: - cancel_delayed_work_sync(&adapter->overflow_work); + case e1000_82580: + case e1000_i350: + cancel_delayed_work_sync(&adapter->ptp_overflow_work); + break; + case e1000_i210: + case e1000_i211: + /* No delayed work to cancel. */ break; default: return; } + cancel_work_sync(&adapter->ptp_tx_work); + if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); dev_info(&adapter->pdev->dev, "removed PHC on %s\n", adapter->netdev->name); + adapter->flags &= ~IGB_FLAG_PTP; } } /** - * igb_systim_to_hwtstamp - convert system time value to hw timestamp - * @adapter: board private structure - * @hwtstamps: timestamp structure to update - * @systim: unsigned 64bit system time value. - * - * We need to convert the system time value stored in the RX/TXSTMP registers - * into a hwtstamp which can be used by the upper level timestamping functions. + * igb_ptp_reset - Re-enable the adapter for PTP following a reset. + * @adapter: Board private structure. * - * The 'tmreg_lock' spinlock is used to protect the consistency of the - * system time value. This is needed because reading the 64 bit time - * value involves reading two (or three) 32 bit registers. The first - * read latches the value. Ditto for writing. - * - * In addition, here have extended the system time with an overflow - * counter in software. + * This function handles the reset work required to re-enable the PTP device. **/ -void igb_systim_to_hwtstamp(struct igb_adapter *adapter, - struct skb_shared_hwtstamps *hwtstamps, - u64 systim) +void igb_ptp_reset(struct igb_adapter *adapter) { - u64 ns; - unsigned long flags; + struct e1000_hw *hw = &adapter->hw; + + if (!(adapter->flags & IGB_FLAG_PTP)) + return; switch (adapter->hw.mac.type) { + case e1000_82576: + /* Dial the nominal frequency. */ + wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + break; + case e1000_82580: + case e1000_i350: case e1000_i210: case e1000_i211: - case e1000_i350: - case e1000_82580: - case e1000_82576: + /* Enable the timer functions and interrupts. */ + wr32(E1000_TSAUXC, 0x0); + wr32(E1000_TSIM, E1000_TSIM_TXTS); + wr32(E1000_IMS, E1000_IMS_TS); break; default: + /* No work to do. */ return; } - spin_lock_irqsave(&adapter->tmreg_lock, flags); + /* Re-initialize the timer. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); - ns = timecounter_cyc2time(&adapter->tc, systim); - - spin_unlock_irqrestore(&adapter->tmreg_lock, flags); - - memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(ns); + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); + } } diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 0696abfe994..0ac11f527a8 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2833,7 +2833,7 @@ static void __devexit igbvf_remove(struct pci_dev *pdev) } /* PCI Error Recovery (ERS) */ -static struct pci_error_handlers igbvf_err_handler = { +static const struct pci_error_handlers igbvf_err_handler = { .error_detected = igbvf_io_error_detected, .slot_reset = igbvf_io_slot_reset, .resume = igbvf_io_resume, diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index d05fc95befc..d99a2d51b94 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -115,7 +115,7 @@ static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev, static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev); static void ixgb_io_resume (struct pci_dev *pdev); -static struct pci_error_handlers ixgb_err_handler = { +static const struct pci_error_handlers ixgb_err_handler = { .error_detected = ixgb_io_error_detected, .slot_reset = ixgb_io_slot_reset, .resume = ixgb_io_resume, diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index 5fd5d04c26c..89f40e51fc1 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -32,7 +32,7 @@ obj-$(CONFIG_IXGBE) += ixgbe.o -ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ +ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o ixgbe_debugfs.o\ ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \ ixgbe_mbx.o ixgbe_x540.o ixgbe_lib.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index b9623e9ea89..5bd26763554 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -78,6 +78,9 @@ /* Supported Rx Buffer Sizes */ #define IXGBE_RXBUFFER_256 256 /* Used for skb receive header */ +#define IXGBE_RXBUFFER_2K 2048 +#define IXGBE_RXBUFFER_3K 3072 +#define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ /* @@ -104,6 +107,7 @@ #define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) #define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) #define IXGBE_TX_FLAGS_TSTAMP (u32)(1 << 8) +#define IXGBE_TX_FLAGS_NO_IFCS (u32)(1 << 9) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 @@ -293,16 +297,25 @@ struct ixgbe_ring_feature { * this is twice the size of a half page we need to double the page order * for FCoE enabled Rx queues. */ -#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) -static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +static inline unsigned int ixgbe_rx_bufsz(struct ixgbe_ring *ring) { - return test_bit(__IXGBE_RX_FCOE, &ring->state) ? 1 : 0; +#ifdef IXGBE_FCOE + if (test_bit(__IXGBE_RX_FCOE, &ring->state)) + return (PAGE_SIZE < 8192) ? IXGBE_RXBUFFER_4K : + IXGBE_RXBUFFER_3K; +#endif + return IXGBE_RXBUFFER_2K; } -#else -#define ixgbe_rx_pg_order(_ring) 0 + +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ +#ifdef IXGBE_FCOE + if (test_bit(__IXGBE_RX_FCOE, &ring->state)) + return (PAGE_SIZE < 8192) ? 1 : 0; #endif + return 0; +} #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) -#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) struct ixgbe_ring_container { struct ixgbe_ring *ring; /* pointer to linked list of rings */ @@ -584,6 +597,9 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff ixgbe_hwmon_buff; #endif /* CONFIG_IXGBE_HWMON */ +#ifdef CONFIG_DEBUG_FS + struct dentry *ixgbe_dbg_adapter; +#endif /*CONFIG_DEBUG_FS*/ }; struct ixgbe_fdir_filter { @@ -712,7 +728,12 @@ extern int ixgbe_fcoe_get_hbainfo(struct net_device *netdev, struct netdev_fcoe_hbainfo *info); extern u8 ixgbe_fcoe_get_tc(struct ixgbe_adapter *adapter); #endif /* IXGBE_FCOE */ - +#ifdef CONFIG_DEBUG_FS +extern void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter); +extern void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter); +extern void ixgbe_dbg_init(void); +extern void ixgbe_dbg_exit(void); +#endif /* CONFIG_DEBUG_FS */ static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring) { return netdev_get_tx_queue(ring->netdev, ring->queue_index); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c new file mode 100644 index 00000000000..8d3a2188909 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c @@ -0,0 +1,300 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List <e1000-devel@lists.sourceforge.net> + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifdef CONFIG_DEBUG_FS + +#include <linux/debugfs.h> +#include <linux/module.h> + +#include "ixgbe.h" + +static struct dentry *ixgbe_dbg_root; + +static char ixgbe_dbg_reg_ops_buf[256] = ""; + +/** + * ixgbe_dbg_reg_ops_open - prep the debugfs pokee data item when opened + * @inode: inode that was opened + * @filp: file info + * + * Stash the adapter pointer hiding in the inode into the file pointer where + * we can find it later in the read and write calls + **/ +static int ixgbe_dbg_reg_ops_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/** + * ixgbe_dbg_reg_ops_read - read for reg_ops datum + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_reg_ops_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + char buf[256]; + int bytes_not_copied; + int len; + + /* don't allow partial reads */ + if (*ppos != 0) + return 0; + + len = snprintf(buf, sizeof(buf), "%s: %s\n", + adapter->netdev->name, ixgbe_dbg_reg_ops_buf); + if (count < len) + return -ENOSPC; + bytes_not_copied = copy_to_user(buffer, buf, len); + if (bytes_not_copied < 0) + return bytes_not_copied; + + *ppos = len; + return len; +} + +/** + * ixgbe_dbg_reg_ops_write - write into reg_ops datum + * @filp: the opened file + * @buffer: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_reg_ops_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + int bytes_not_copied; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + if (count >= sizeof(ixgbe_dbg_reg_ops_buf)) + return -ENOSPC; + + bytes_not_copied = copy_from_user(ixgbe_dbg_reg_ops_buf, buffer, count); + if (bytes_not_copied < 0) + return bytes_not_copied; + else if (bytes_not_copied < count) + count -= bytes_not_copied; + else + return -ENOSPC; + ixgbe_dbg_reg_ops_buf[count] = '\0'; + + if (strncmp(ixgbe_dbg_reg_ops_buf, "write", 5) == 0) { + u32 reg, value; + int cnt; + cnt = sscanf(&ixgbe_dbg_reg_ops_buf[5], "%x %x", ®, &value); + if (cnt == 2) { + IXGBE_WRITE_REG(&adapter->hw, reg, value); + value = IXGBE_READ_REG(&adapter->hw, reg); + e_dev_info("write: 0x%08x = 0x%08x\n", reg, value); + } else { + e_dev_info("write <reg> <value>\n"); + } + } else if (strncmp(ixgbe_dbg_reg_ops_buf, "read", 4) == 0) { + u32 reg, value; + int cnt; + cnt = sscanf(&ixgbe_dbg_reg_ops_buf[4], "%x", ®); + if (cnt == 1) { + value = IXGBE_READ_REG(&adapter->hw, reg); + e_dev_info("read 0x%08x = 0x%08x\n", reg, value); + } else { + e_dev_info("read <reg>\n"); + } + } else { + e_dev_info("Unknown command %s\n", ixgbe_dbg_reg_ops_buf); + e_dev_info("Available commands:\n"); + e_dev_info(" read <reg>\n"); + e_dev_info(" write <reg> <value>\n"); + } + return count; +} + +static const struct file_operations ixgbe_dbg_reg_ops_fops = { + .owner = THIS_MODULE, + .open = ixgbe_dbg_reg_ops_open, + .read = ixgbe_dbg_reg_ops_read, + .write = ixgbe_dbg_reg_ops_write, +}; + +static char ixgbe_dbg_netdev_ops_buf[256] = ""; + +/** + * ixgbe_dbg_netdev_ops_open - prep the debugfs netdev_ops data item + * @inode: inode that was opened + * @filp: file info + * + * Stash the adapter pointer hiding in the inode into the file pointer + * where we can find it later in the read and write calls + **/ +static int ixgbe_dbg_netdev_ops_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/** + * ixgbe_dbg_netdev_ops_read - read for netdev_ops datum + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_netdev_ops_read(struct file *filp, + char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + char buf[256]; + int bytes_not_copied; + int len; + + /* don't allow partial reads */ + if (*ppos != 0) + return 0; + + len = snprintf(buf, sizeof(buf), "%s: %s\n", + adapter->netdev->name, ixgbe_dbg_netdev_ops_buf); + if (count < len) + return -ENOSPC; + bytes_not_copied = copy_to_user(buffer, buf, len); + if (bytes_not_copied < 0) + return bytes_not_copied; + + *ppos = len; + return len; +} + +/** + * ixgbe_dbg_netdev_ops_write - write into netdev_ops datum + * @filp: the opened file + * @buffer: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + **/ +static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct ixgbe_adapter *adapter = filp->private_data; + int bytes_not_copied; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + if (count >= sizeof(ixgbe_dbg_netdev_ops_buf)) + return -ENOSPC; + + bytes_not_copied = copy_from_user(ixgbe_dbg_netdev_ops_buf, + buffer, count); + if (bytes_not_copied < 0) + return bytes_not_copied; + else if (bytes_not_copied < count) + count -= bytes_not_copied; + else + return -ENOSPC; + ixgbe_dbg_netdev_ops_buf[count] = '\0'; + + if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) { + adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev); + e_dev_info("tx_timeout called\n"); + } else { + e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf); + e_dev_info("Available commands:\n"); + e_dev_info(" tx_timeout\n"); + } + return count; +} + +static const struct file_operations ixgbe_dbg_netdev_ops_fops = { + .owner = THIS_MODULE, + .open = ixgbe_dbg_netdev_ops_open, + .read = ixgbe_dbg_netdev_ops_read, + .write = ixgbe_dbg_netdev_ops_write, +}; + +/** + * ixgbe_dbg_adapter_init - setup the debugfs directory for the adapter + * @adapter: the adapter that is starting up + **/ +void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter) +{ + const char *name = pci_name(adapter->pdev); + struct dentry *pfile; + adapter->ixgbe_dbg_adapter = debugfs_create_dir(name, ixgbe_dbg_root); + if (adapter->ixgbe_dbg_adapter) { + pfile = debugfs_create_file("reg_ops", 0600, + adapter->ixgbe_dbg_adapter, adapter, + &ixgbe_dbg_reg_ops_fops); + if (!pfile) + e_dev_err("debugfs reg_ops for %s failed\n", name); + pfile = debugfs_create_file("netdev_ops", 0600, + adapter->ixgbe_dbg_adapter, adapter, + &ixgbe_dbg_netdev_ops_fops); + if (!pfile) + e_dev_err("debugfs netdev_ops for %s failed\n", name); + } else { + e_dev_err("debugfs entry for %s failed\n", name); + } +} + +/** + * ixgbe_dbg_adapter_exit - clear out the adapter's debugfs entries + * @pf: the pf that is stopping + **/ +void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter) +{ + if (adapter->ixgbe_dbg_adapter) + debugfs_remove_recursive(adapter->ixgbe_dbg_adapter); + adapter->ixgbe_dbg_adapter = NULL; +} + +/** + * ixgbe_dbg_init - start up debugfs for the driver + **/ +void ixgbe_dbg_init(void) +{ + ixgbe_dbg_root = debugfs_create_dir(ixgbe_driver_name, NULL); + if (ixgbe_dbg_root == NULL) + pr_err("init of debugfs failed\n"); +} + +/** + * ixgbe_dbg_exit - clean out the driver's debugfs entries + **/ +void ixgbe_dbg_exit(void) +{ + debugfs_remove_recursive(ixgbe_dbg_root); +} + +#endif /* CONFIG_DEBUG_FS */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4326f74f713..868af693821 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1167,7 +1167,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring, } bi->dma = dma; - bi->page_offset ^= ixgbe_rx_bufsz(rx_ring); + bi->page_offset = 0; return true; } @@ -1320,29 +1320,6 @@ static unsigned int ixgbe_get_headlen(unsigned char *data, return max_len; } -static void ixgbe_get_rsc_cnt(struct ixgbe_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - __le32 rsc_enabled; - u32 rsc_cnt; - - if (!ring_is_rsc_enabled(rx_ring)) - return; - - rsc_enabled = rx_desc->wb.lower.lo_dword.data & - cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); - - /* If this is an RSC frame rsc_cnt should be non-zero */ - if (!rsc_enabled) - return; - - rsc_cnt = le32_to_cpu(rsc_enabled); - rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; - - IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; -} - static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring, struct sk_buff *skb) { @@ -1440,16 +1417,28 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, prefetch(IXGBE_RX_DESC(rx_ring, ntc)); - if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - return false; + /* update RSC append count if present */ + if (ring_is_rsc_enabled(rx_ring)) { + __le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data & + cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK); + + if (unlikely(rsc_enabled)) { + u32 rsc_cnt = le32_to_cpu(rsc_enabled); + + rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT; + IXGBE_CB(skb)->append_cnt += rsc_cnt - 1; - /* append_cnt indicates packet is RSC, if so fetch nextp */ - if (IXGBE_CB(skb)->append_cnt) { - ntc = le32_to_cpu(rx_desc->wb.upper.status_error); - ntc &= IXGBE_RXDADV_NEXTP_MASK; - ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + /* update ntc based on RSC value */ + ntc = le32_to_cpu(rx_desc->wb.upper.status_error); + ntc &= IXGBE_RXDADV_NEXTP_MASK; + ntc >>= IXGBE_RXDADV_NEXTP_SHIFT; + } } + /* if we are the last buffer then there is nothing else to do */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + /* place skb in next buffer to be received */ rx_ring->rx_buffer_info[ntc].skb = skb; rx_ring->rx_stats.non_eop_descs++; @@ -1458,6 +1447,78 @@ static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, } /** + * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being adjusted + * + * This function is an ixgbe specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB + * @rx_ring: rx descriptor ring packet is being transacted on + * @skb: pointer to current skb being updated + * + * This function provides a basic DMA sync up for the first fragment of an + * skb. The reason for doing this is that the first fragment cannot be + * unmapped until we have reached the end of packet descriptor for a buffer + * chain. + */ +static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, + struct sk_buff *skb) +{ + /* if the page was released unmap it, else just sync our portion */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); + IXGBE_CB(skb)->page_released = false; + } else { + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + + dma_sync_single_range_for_cpu(rx_ring->dev, + IXGBE_CB(skb)->dma, + frag->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + IXGBE_CB(skb)->dma = 0; +} + +/** * ixgbe_cleanup_headers - Correct corrupted or empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor @@ -1479,24 +1540,7 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; struct net_device *netdev = rx_ring->netdev; - unsigned char *va; - unsigned int pull_len; - - /* if the page was released unmap it, else just sync our portion */ - if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), DMA_FROM_DEVICE); - IXGBE_CB(skb)->page_released = false; - } else { - dma_sync_single_range_for_cpu(rx_ring->dev, - IXGBE_CB(skb)->dma, - frag->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - } - IXGBE_CB(skb)->dma = 0; /* verify that the packet does not have any known errors */ if (unlikely(ixgbe_test_staterr(rx_desc, @@ -1506,40 +1550,9 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, return true; } - /* - * it is valid to use page_address instead of kmap since we are - * working with pages allocated out of the lomem pool per - * alloc_page(GFP_ATOMIC) - */ - va = skb_frag_address(frag); - - /* - * we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = skb_frag_size(frag); - if (pull_len > IXGBE_RX_HDR_SIZE) - pull_len = ixgbe_get_headlen(va, IXGBE_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; - skb->data_len -= pull_len; - skb->tail += pull_len; - - /* - * if we sucked the frag empty then we should free it, - * if there are other frags here something is screwed up in hardware - */ - if (skb_frag_size(frag) == 0) { - BUG_ON(skb_shinfo(skb)->nr_frags != 1); - skb_shinfo(skb)->nr_frags = 0; - __skb_frag_unref(frag); - skb->truesize -= ixgbe_rx_bufsz(rx_ring); - } + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + ixgbe_pull_tail(rx_ring, skb); #ifdef IXGBE_FCOE /* do not attempt to pad FCoE Frames as this will disrupt DDP */ @@ -1560,33 +1573,17 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, } /** - * ixgbe_can_reuse_page - determine if we can reuse a page - * @rx_buffer: pointer to rx_buffer containing the page we want to reuse - * - * Returns true if page can be reused in another Rx buffer - **/ -static inline bool ixgbe_can_reuse_page(struct ixgbe_rx_buffer *rx_buffer) -{ - struct page *page = rx_buffer->page; - - /* if we are only owner of page and it is local we can reuse it */ - return likely(page_count(page) == 1) && - likely(page_to_nid(page) == numa_node_id()); -} - -/** * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring * @rx_ring: rx descriptor ring to store buffers on * @old_buff: donor buffer to have page reused * - * Syncronizes page for reuse by the adapter + * Synchronizes page for reuse by the adapter **/ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *old_buff) { struct ixgbe_rx_buffer *new_buff; u16 nta = rx_ring->next_to_alloc; - u16 bufsz = ixgbe_rx_bufsz(rx_ring); new_buff = &rx_ring->rx_buffer_info[nta]; @@ -1597,17 +1594,13 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, /* transfer page from old buffer to new buffer */ new_buff->page = old_buff->page; new_buff->dma = old_buff->dma; - - /* flip page offset to other buffer and store to new_buff */ - new_buff->page_offset = old_buff->page_offset ^ bufsz; + new_buff->page_offset = old_buff->page_offset; /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma, - new_buff->page_offset, bufsz, + new_buff->page_offset, + ixgbe_rx_bufsz(rx_ring), DMA_FROM_DEVICE); - - /* bump ref count on page before it is given to the stack */ - get_page(new_buff->page); } /** @@ -1617,20 +1610,159 @@ static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buff to place the data into * - * This function is based on skb_add_rx_frag. I would have used that - * function however it doesn't handle the truesize case correctly since we - * are allocating more memory than might be used for a single receive. + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. **/ -static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, +static bool ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - struct sk_buff *skb, int size) + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) { - skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, - rx_buffer->page, rx_buffer->page_offset, - size); - skb->len += size; - skb->data_len += size; - skb->truesize += ixgbe_rx_bufsz(rx_ring); + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbe_rx_bufsz(rx_ring); +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int last_offset = ixgbe_rx_pg_size(rx_ring) - + ixgbe_rx_bufsz(rx_ring); +#endif + + if ((size <= IXGBE_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, truesize); + + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= truesize; + + /* + * since we are the only owner of the page and we need to + * increment it, just set the value to 2 in order to avoid + * an unecessary locked operation + */ + atomic_set(&page->_count, 2); +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > last_offset) + return false; + + /* bump ref count on page before it is given to the stack */ + get_page(page); +#endif + + return true; +} + +static struct sk_buff *ixgbe_fetch_rx_buffer(struct ixgbe_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + struct ixgbe_rx_buffer *rx_buffer; + struct sk_buff *skb; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + page = rx_buffer->page; + prefetchw(page); + + skb = rx_buffer->skb; + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IXGBE_RX_HDR_SIZE); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_rx_buff_failed++; + return NULL; + } + + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + + /* + * Delay unmapping of the first packet. It carries the + * header information, HW may still access the header + * after the writeback. Only unmap it when EOP is + * reached + */ + if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + goto dma_sync; + + IXGBE_CB(skb)->dma = rx_buffer->dma; + } else { + if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) + ixgbe_dma_sync_frag(rx_ring, skb); + +dma_sync: + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + ixgbe_rx_bufsz(rx_ring), + DMA_FROM_DEVICE); + } + + /* pull page into skb */ + if (ixgbe_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + ixgbe_reuse_rx_page(rx_ring, rx_buffer); + } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { + /* the page has been released from the ring */ + IXGBE_CB(skb)->page_released = true; + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE); + } + + /* clear contents of buffer_info */ + rx_buffer->skb = NULL; + rx_buffer->dma = 0; + rx_buffer->page = NULL; + + return skb; } /** @@ -1653,16 +1785,14 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, unsigned int total_rx_bytes = 0, total_rx_packets = 0; #ifdef IXGBE_FCOE struct ixgbe_adapter *adapter = q_vector->adapter; - int ddp_bytes = 0; + int ddp_bytes; + unsigned int mss = 0; #endif /* IXGBE_FCOE */ u16 cleaned_count = ixgbe_desc_unused(rx_ring); do { - struct ixgbe_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; struct sk_buff *skb; - struct page *page; - u16 ntc; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) { @@ -1670,9 +1800,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, cleaned_count = 0; } - ntc = rx_ring->next_to_clean; - rx_desc = IXGBE_RX_DESC(rx_ring, ntc); - rx_buffer = &rx_ring->rx_buffer_info[ntc]; + rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean); if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_DD)) break; @@ -1684,75 +1812,12 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ rmb(); - page = rx_buffer->page; - prefetchw(page); - - skb = rx_buffer->skb; - - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; - - /* prefetch first cache line of first page */ - prefetch(page_addr); -#if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); -#endif + /* retrieve a buffer from the ring */ + skb = ixgbe_fetch_rx_buffer(rx_ring, rx_desc); - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IXGBE_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - break; - } - - /* - * we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - - /* - * Delay unmapping of the first packet. It carries the - * header information, HW may still access the header - * after the writeback. Only unmap it when EOP is - * reached - */ - IXGBE_CB(skb)->dma = rx_buffer->dma; - } else { - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - ixgbe_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - } - - /* pull page into skb */ - ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, - le16_to_cpu(rx_desc->wb.upper.length)); - - if (ixgbe_can_reuse_page(rx_buffer)) { - /* hand second half of page back to the ring */ - ixgbe_reuse_rx_page(rx_ring, rx_buffer); - } else if (IXGBE_CB(skb)->dma == rx_buffer->dma) { - /* the page has been released from the ring */ - IXGBE_CB(skb)->page_released = true; - } else { - /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE); - } - - /* clear contents of buffer_info */ - rx_buffer->skb = NULL; - rx_buffer->dma = 0; - rx_buffer->page = NULL; - - ixgbe_get_rsc_cnt(rx_ring, rx_desc, skb); + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; cleaned_count++; @@ -1775,6 +1840,20 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, /* if ddp, not passing to ULD unless for FCP_RSP or error */ if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) { ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb); + /* include DDPed FCoE data */ + if (ddp_bytes > 0) { + if (!mss) { + mss = rx_ring->netdev->mtu - + sizeof(struct fcoe_hdr) - + sizeof(struct fc_frame_header) - + sizeof(struct fcoe_crc_eof); + if (mss > 512) + mss &= ~511; + } + total_rx_bytes += ddp_bytes; + total_rx_packets += DIV_ROUND_UP(ddp_bytes, + mss); + } if (!ddp_bytes) { dev_kfree_skb_any(skb); continue; @@ -1788,21 +1867,6 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, budget--; } while (likely(budget)); -#ifdef IXGBE_FCOE - /* include DDPed FCoE data */ - if (ddp_bytes > 0) { - unsigned int mss; - - mss = rx_ring->netdev->mtu - sizeof(struct fcoe_hdr) - - sizeof(struct fc_frame_header) - - sizeof(struct fcoe_crc_eof); - if (mss > 512) - mss &= ~511; - total_rx_bytes += ddp_bytes; - total_rx_packets += DIV_ROUND_UP(ddp_bytes, mss); - } - -#endif /* IXGBE_FCOE */ u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; @@ -2868,11 +2932,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter, srrctl = IXGBE_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; /* configure the packet buffer length */ -#if PAGE_SIZE > IXGBE_MAX_RXBUFFER - srrctl |= IXGBE_MAX_RXBUFFER >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#else srrctl |= ixgbe_rx_bufsz(rx_ring) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; -#endif /* configure descriptor type */ srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; @@ -2980,13 +3040,7 @@ static void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, * total size of max desc * buf_len is not greater * than 65536 */ -#if (PAGE_SIZE <= 8192) rscctrl |= IXGBE_RSCCTL_MAXDESC_16; -#elif (PAGE_SIZE <= 16384) - rscctrl |= IXGBE_RSCCTL_MAXDESC_8; -#else - rscctrl |= IXGBE_RSCCTL_MAXDESC_4; -#endif IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl); } @@ -3606,8 +3660,6 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) if (hw->mac.type == ixgbe_mac_82598EB) netif_set_gso_max_size(adapter->netdev, 32768); - hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true); - #ifdef IXGBE_FCOE if (adapter->netdev->features & NETIF_F_FCOE_MTU) max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); @@ -3807,6 +3859,11 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) #ifdef CONFIG_IXGBE_DCB ixgbe_configure_dcb(adapter); #endif + /* + * We must restore virtualization before VLANs or else + * the VLVF registers will not be populated + */ + ixgbe_configure_virtualization(adapter); ixgbe_set_rx_mode(adapter->netdev); ixgbe_restore_vlan(adapter); @@ -3838,8 +3895,6 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) break; } - ixgbe_configure_virtualization(adapter); - #ifdef IXGBE_FCOE /* configure FCoE L2 filters, redirection table, and Rx control */ ixgbe_configure_fcoe(adapter); @@ -4130,27 +4185,6 @@ void ixgbe_reset(struct ixgbe_adapter *adapter) } /** - * ixgbe_init_rx_page_offset - initialize page offset values for Rx buffers - * @rx_ring: ring to setup - * - * On many IA platforms the L1 cache has a critical stride of 4K, this - * results in each receive buffer starting in the same cache set. To help - * reduce the pressure on this cache set we can interleave the offsets so - * that only every other buffer will be in the same cache set. - **/ -static void ixgbe_init_rx_page_offset(struct ixgbe_ring *rx_ring) -{ - struct ixgbe_rx_buffer *rx_buffer = rx_ring->rx_buffer_info; - u16 i; - - for (i = 0; i < rx_ring->count; i += 2) { - rx_buffer[0].page_offset = 0; - rx_buffer[1].page_offset = ixgbe_rx_bufsz(rx_ring); - rx_buffer = &rx_buffer[2]; - } -} - -/** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ @@ -4195,8 +4229,6 @@ static void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring) size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; memset(rx_ring->rx_buffer_info, 0, size); - ixgbe_init_rx_page_offset(rx_ring); - /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); @@ -4646,8 +4678,6 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - ixgbe_init_rx_page_offset(rx_ring); - return 0; err: vfree(rx_ring->rx_buffer_info); @@ -5530,8 +5560,9 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter) { u32 ssvpc; - /* Do not perform spoof check for 82598 */ - if (adapter->hw.mac.type == ixgbe_mac_82598EB) + /* Do not perform spoof check for 82598 or if not in IOV mode */ + if (adapter->hw.mac.type == ixgbe_mac_82598EB || + adapter->num_vfs == 0) return; ssvpc = IXGBE_READ_REG(&adapter->hw, IXGBE_SSVPC); @@ -5543,7 +5574,7 @@ static void ixgbe_spoof_check(struct ixgbe_adapter *adapter) if (!ssvpc) return; - e_warn(drv, "%d Spoofed packets detected\n", ssvpc); + e_warn(drv, "%u Spoofed packets detected\n", ssvpc); } /** @@ -5874,9 +5905,12 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring, u32 type_tucmd = 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { - if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN) && - !(first->tx_flags & IXGBE_TX_FLAGS_TXSW)) - return; + if (!(first->tx_flags & IXGBE_TX_FLAGS_HW_VLAN)) { + if (unlikely(skb->no_fcs)) + first->tx_flags |= IXGBE_TX_FLAGS_NO_IFCS; + if (!(first->tx_flags & IXGBE_TX_FLAGS_TXSW)) + return; + } } else { u8 l4_hdr = 0; switch (first->protocol) { @@ -5938,7 +5972,6 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) { /* set type for advanced descriptor with frame checksum insertion */ __le32 cmd_type = cpu_to_le32(IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT); /* set HW vlan bit if vlan is present */ @@ -5958,6 +5991,10 @@ static __le32 ixgbe_tx_cmd_type(u32 tx_flags) #endif cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_TSE); + /* insert frame checksum */ + if (!(tx_flags & IXGBE_TX_FLAGS_NO_IFCS)) + cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS); + return cmd_type; } @@ -6063,8 +6100,6 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, if (likely(!data_len)) break; - if (unlikely(skb->no_fcs)) - cmd_type &= ~(cpu_to_le32(IXGBE_ADVTXD_DCMD_IFCS)); tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); i++; @@ -6854,9 +6889,9 @@ static int ixgbe_set_features(struct net_device *netdev, return 0; } -static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, +static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, - unsigned char *addr, + const unsigned char *addr, u16 flags) { struct ixgbe_adapter *adapter = netdev_priv(dev); @@ -6893,7 +6928,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, static int ixgbe_ndo_fdb_del(struct ndmsg *ndm, struct net_device *dev, - unsigned char *addr) + const unsigned char *addr) { struct ixgbe_adapter *adapter = netdev_priv(dev); int err = -EOPNOTSUPP; @@ -7136,11 +7171,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, goto err_ioremap; } - for (i = 1; i <= 5; i++) { - if (pci_resource_len(pdev, i) == 0) - continue; - } - netdev->netdev_ops = &ixgbe_netdev_ops; ixgbe_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; @@ -7419,6 +7449,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, e_err(probe, "failed to allocate sysfs resources\n"); #endif /* CONFIG_IXGBE_HWMON */ +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_adapter_init(adapter); +#endif /* CONFIG_DEBUG_FS */ + return 0; err_register: @@ -7453,6 +7487,10 @@ static void __devexit ixgbe_remove(struct pci_dev *pdev) struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev; +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_adapter_exit(adapter); +#endif /*CONFIG_DEBUG_FS */ + set_bit(__IXGBE_DOWN, &adapter->state); cancel_work_sync(&adapter->service_task); @@ -7527,7 +7565,7 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, goto skip_bad_vf_detection; bdev = pdev->bus->self; - while (bdev && (bdev->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) + while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT)) bdev = bdev->bus->self; if (!bdev) @@ -7677,7 +7715,7 @@ static void ixgbe_io_resume(struct pci_dev *pdev) netif_device_attach(netdev); } -static struct pci_error_handlers ixgbe_err_handler = { +static const struct pci_error_handlers ixgbe_err_handler = { .error_detected = ixgbe_io_error_detected, .slot_reset = ixgbe_io_slot_reset, .resume = ixgbe_io_resume, @@ -7708,6 +7746,10 @@ static int __init ixgbe_init_module(void) pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version); pr_info("%s\n", ixgbe_copyright); +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_init(); +#endif /* CONFIG_DEBUG_FS */ + #ifdef CONFIG_IXGBE_DCA dca_register_notify(&dca_notifier); #endif @@ -7730,6 +7772,11 @@ static void __exit ixgbe_exit_module(void) dca_unregister_notify(&dca_notifier); #endif pci_unregister_driver(&ixgbe_driver); + +#ifdef CONFIG_DEBUG_FS + ixgbe_dbg_exit(); +#endif /* CONFIG_DEBUG_FS */ + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 3456d561714..39881cb17a4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -960,7 +960,8 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) /* (Re)start the overflow check */ adapter->flags2 |= IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED; - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps); + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; e_dev_err("ptp_clock_register failed\n"); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 4fea8716ab6..dce48bf64d9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -346,6 +346,10 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter) static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf) { + /* VLAN 0 is a special case, don't allow it to be removed */ + if (!vid && !add) + return 0; + return adapter->hw.mac.ops.set_vfta(&adapter->hw, vid, vf, (bool)add); } @@ -414,6 +418,7 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf) VLAN_PRIO_SHIFT)), vf); ixgbe_set_vmolr(hw, vf, false); } else { + ixgbe_set_vf_vlan(adapter, true, 0, vf); ixgbe_set_vmvir(adapter, 0, vf); ixgbe_set_vmolr(hw, vf, true); } @@ -810,9 +815,9 @@ out: return err; } -static int ixgbe_link_mbps(int internal_link_speed) +static int ixgbe_link_mbps(struct ixgbe_adapter *adapter) { - switch (internal_link_speed) { + switch (adapter->link_speed) { case IXGBE_LINK_SPEED_100_FULL: return 100; case IXGBE_LINK_SPEED_1GB_FULL: @@ -824,27 +829,30 @@ static int ixgbe_link_mbps(int internal_link_speed) } } -static void ixgbe_set_vf_rate_limit(struct ixgbe_hw *hw, int vf, int tx_rate, - int link_speed) +static void ixgbe_set_vf_rate_limit(struct ixgbe_adapter *adapter, int vf) { - int rf_dec, rf_int; - u32 bcnrc_val; + struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; + struct ixgbe_hw *hw = &adapter->hw; + u32 bcnrc_val = 0; + u16 queue, queues_per_pool; + u16 tx_rate = adapter->vfinfo[vf].tx_rate; + + if (tx_rate) { + /* start with base link speed value */ + bcnrc_val = adapter->vf_rate_link_speed; - if (tx_rate != 0) { /* Calculate the rate factor values to set */ - rf_int = link_speed / tx_rate; - rf_dec = (link_speed - (rf_int * tx_rate)); - rf_dec = (rf_dec * (1<<IXGBE_RTTBCNRC_RF_INT_SHIFT)) / tx_rate; - - bcnrc_val = IXGBE_RTTBCNRC_RS_ENA; - bcnrc_val |= ((rf_int<<IXGBE_RTTBCNRC_RF_INT_SHIFT) & - IXGBE_RTTBCNRC_RF_INT_MASK); - bcnrc_val |= (rf_dec & IXGBE_RTTBCNRC_RF_DEC_MASK); - } else { - bcnrc_val = 0; + bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT; + bcnrc_val /= tx_rate; + + /* clear everything but the rate factor */ + bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK | + IXGBE_RTTBCNRC_RF_DEC_MASK; + + /* enable the rate scheduler */ + bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA; } - IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, 2*vf); /* vf Y uses queue 2*Y */ /* * Set global transmit compensation time to the MMW_SIZE in RTTBCNRM * register. Typically MMW_SIZE=0x014 if 9728-byte jumbo is supported @@ -861,53 +869,68 @@ static void ixgbe_set_vf_rate_limit(struct ixgbe_hw *hw, int vf, int tx_rate, break; } - IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val); + /* determine how many queues per pool based on VMDq mask */ + queues_per_pool = __ALIGN_MASK(1, ~vmdq->mask); + + /* write value for all Tx queues belonging to VF */ + for (queue = 0; queue < queues_per_pool; queue++) { + unsigned int reg_idx = (vf * queues_per_pool) + queue; + + IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, reg_idx); + IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val); + } } void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter) { - int actual_link_speed, i; - bool reset_rate = false; + int i; /* VF Tx rate limit was not set */ - if (adapter->vf_rate_link_speed == 0) + if (!adapter->vf_rate_link_speed) return; - actual_link_speed = ixgbe_link_mbps(adapter->link_speed); - if (actual_link_speed != adapter->vf_rate_link_speed) { - reset_rate = true; + if (ixgbe_link_mbps(adapter) != adapter->vf_rate_link_speed) { adapter->vf_rate_link_speed = 0; dev_info(&adapter->pdev->dev, - "Link speed has been changed. VF Transmit rate " - "is disabled\n"); + "Link speed has been changed. VF Transmit rate is disabled\n"); } for (i = 0; i < adapter->num_vfs; i++) { - if (reset_rate) + if (!adapter->vf_rate_link_speed) adapter->vfinfo[i].tx_rate = 0; - ixgbe_set_vf_rate_limit(&adapter->hw, i, - adapter->vfinfo[i].tx_rate, - actual_link_speed); + ixgbe_set_vf_rate_limit(adapter, i); } } int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - int actual_link_speed; + int link_speed; + + /* verify VF is active */ + if (vf >= adapter->num_vfs) + return -EINVAL; - actual_link_speed = ixgbe_link_mbps(adapter->link_speed); - if ((vf >= adapter->num_vfs) || (!adapter->link_up) || - (tx_rate > actual_link_speed) || (actual_link_speed != 10000) || - ((tx_rate != 0) && (tx_rate <= 10))) - /* rate limit cannot be set to 10Mb or less in 10Gb adapters */ + /* verify link is up */ + if (!adapter->link_up) return -EINVAL; - adapter->vf_rate_link_speed = actual_link_speed; - adapter->vfinfo[vf].tx_rate = (u16)tx_rate; - ixgbe_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + /* verify we are linked at 10Gbps */ + link_speed = ixgbe_link_mbps(adapter); + if (link_speed != 10000) + return -EINVAL; + + /* rate limit cannot be less than 10Mbs or greater than link speed */ + if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed))) + return -EINVAL; + + /* store values */ + adapter->vf_rate_link_speed = link_speed; + adapter->vfinfo[vf].tx_rate = tx_rate; + + /* update hardware configuration */ + ixgbe_set_vf_rate_limit(adapter, vf); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index 418af827b23..da17ccf5c09 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -272,5 +272,6 @@ struct ixgbe_adv_tx_context_desc { /* Error Codes */ #define IXGBE_ERR_INVALID_MAC_ADDR -1 #define IXGBE_ERR_RESET_FAILED -2 +#define IXGBE_ERR_INVALID_ARGUMENT -3 #endif /* _IXGBEVF_DEFINES_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 98cadb0c4da..383b4e1cd17 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -101,7 +101,9 @@ struct ixgbevf_ring { /* Supported Rx Buffer Sizes */ #define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ -#define IXGBEVF_RXBUFFER_2048 2048 +#define IXGBEVF_RXBUFFER_3K 3072 +#define IXGBEVF_RXBUFFER_7K 7168 +#define IXGBEVF_RXBUFFER_15K 15360 #define IXGBEVF_MAX_RXBUFFER 16384 /* largest size for single descriptor */ #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 @@ -259,6 +261,11 @@ enum ixbgevf_state_t { __IXGBEVF_DOWN }; +struct ixgbevf_cb { + struct sk_buff *prev; +}; +#define IXGBE_CB(skb) ((struct ixgbevf_cb *)(skb)->cb) + enum ixgbevf_boards { board_82599_vf, board_X540_vf, diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 60ef6458741..0ee9bd4819f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -263,6 +263,8 @@ cont_loop: tx_ring->total_bytes += total_bytes; tx_ring->total_packets += total_packets; u64_stats_update_end(&tx_ring->syncp); + q_vector->tx.total_bytes += total_bytes; + q_vector->tx.total_packets += total_packets; return count < tx_ring->count; } @@ -272,12 +274,10 @@ cont_loop: * @q_vector: structure containing interrupt and ring information * @skb: packet to send up * @status: hardware indication of status of receive - * @rx_ring: rx descriptor ring (for a specific queue) to setup * @rx_desc: rx descriptor **/ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, struct sk_buff *skb, u8 status, - struct ixgbevf_ring *ring, union ixgbe_adv_rx_desc *rx_desc) { struct ixgbevf_adapter *adapter = q_vector->adapter; @@ -433,11 +433,21 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, if (!(staterr & IXGBE_RXD_STAT_EOP)) { skb->next = next_buffer->skb; - skb->next->prev = skb; + IXGBE_CB(skb->next)->prev = skb; adapter->non_eop_descs++; goto next_desc; } + /* we should not be chaining buffers, if we did drop the skb */ + if (IXGBE_CB(skb)->prev) { + do { + struct sk_buff *this = skb; + skb = IXGBE_CB(skb)->prev; + dev_kfree_skb(this); + } while (skb); + goto next_desc; + } + /* ERR_MASK will only have valid bits if EOP set */ if (unlikely(staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK)) { dev_kfree_skb_irq(skb); @@ -461,7 +471,7 @@ static bool ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, } skb->protocol = eth_type_trans(skb, rx_ring->netdev); - ixgbevf_receive_skb(q_vector, skb, staterr, rx_ring, rx_desc); + ixgbevf_receive_skb(q_vector, skb, staterr, rx_desc); next_desc: rx_desc->wb.upper.status_error = 0; @@ -490,6 +500,8 @@ next_desc: rx_ring->total_packets += total_rx_packets; rx_ring->total_bytes += total_rx_bytes; u64_stats_update_end(&rx_ring->syncp); + q_vector->rx.total_packets += total_rx_packets; + q_vector->rx.total_bytes += total_rx_bytes; return !!budget; } @@ -716,40 +728,15 @@ static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector) } } -static irqreturn_t ixgbevf_msix_mbx(int irq, void *data) +static irqreturn_t ixgbevf_msix_other(int irq, void *data) { struct ixgbevf_adapter *adapter = data; struct ixgbe_hw *hw = &adapter->hw; - u32 msg; - bool got_ack = false; - - if (!hw->mbx.ops.check_for_ack(hw)) - got_ack = true; - if (!hw->mbx.ops.check_for_msg(hw)) { - hw->mbx.ops.read(hw, &msg, 1); + hw->mac.get_link_status = 1; - if ((msg & IXGBE_MBVFICR_VFREQ_MASK) == IXGBE_PF_CONTROL_MSG) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 1)); - - if (msg & IXGBE_VT_MSGTYPE_NACK) - pr_warn("Last Request of type %2.2x to PF Nacked\n", - msg & 0xFF); - /* - * Restore the PFSTS bit in case someone is polling for a - * return message from the PF - */ - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFSTS; - } - - /* - * checking for the ack clears the PFACK bit. Place - * it back in the v2p_mailbox cache so that anyone - * polling for an ack will not miss it - */ - if (got_ack) - hw->mbx.v2p_mailbox |= IXGBE_VFMAILBOX_PFACK; + if (!test_bit(__IXGBEVF_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, adapter->eims_other); @@ -899,10 +886,10 @@ static int ixgbevf_request_msix_irqs(struct ixgbevf_adapter *adapter) } err = request_irq(adapter->msix_entries[vector].vector, - &ixgbevf_msix_mbx, 0, netdev->name, adapter); + &ixgbevf_msix_other, 0, netdev->name, adapter); if (err) { hw_dbg(&adapter->hw, - "request_irq for msix_mbx failed: %d\n", err); + "request_irq for msix_other failed: %d\n", err); goto free_queue_irqs; } @@ -1057,15 +1044,46 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; - if (rx_ring->rx_buf_len == MAXIMUM_ETHERNET_VLAN_SIZE) - srrctl |= IXGBEVF_RXBUFFER_2048 >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= rx_ring->rx_buf_len >> - IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ALIGN(rx_ring->rx_buf_len, 1024) >> + IXGBE_SRRCTL_BSIZEPKT_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); } +static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + int i; + u16 rx_buf_len; + + /* notify the PF of our intent to use this size of frame */ + ixgbevf_rlpml_set_vf(hw, max_frame); + + /* PF will allow an extra 4 bytes past for vlan tagged frames */ + max_frame += VLAN_HLEN; + + /* + * Make best use of allocation by using all but 1K of a + * power of 2 allocation that will be used for skb->head. + */ + if ((hw->mac.type == ixgbe_mac_X540_vf) && + (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)) + rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; + else if (max_frame <= IXGBEVF_RXBUFFER_3K) + rx_buf_len = IXGBEVF_RXBUFFER_3K; + else if (max_frame <= IXGBEVF_RXBUFFER_7K) + rx_buf_len = IXGBEVF_RXBUFFER_7K; + else if (max_frame <= IXGBEVF_RXBUFFER_15K) + rx_buf_len = IXGBEVF_RXBUFFER_15K; + else + rx_buf_len = IXGBEVF_MAX_RXBUFFER; + + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i].rx_buf_len = rx_buf_len; +} + /** * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset * @adapter: board private structure @@ -1076,18 +1094,14 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) { u64 rdba; struct ixgbe_hw *hw = &adapter->hw; - struct net_device *netdev = adapter->netdev; - int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; int i, j; u32 rdlen; - int rx_buf_len; /* PSRTYPE must be initialized in 82599 */ IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, 0); - if (netdev->mtu <= ETH_DATA_LEN) - rx_buf_len = MAXIMUM_ETHERNET_VLAN_SIZE; - else - rx_buf_len = ALIGN(max_frame, 1024); + + /* set_rx_buffer_len must be called before ring initialization */ + ixgbevf_set_rx_buffer_len(adapter); rdlen = adapter->rx_ring[0].count * sizeof(union ixgbe_adv_rx_desc); /* Setup the HW Rx Head and Tail Descriptor Pointers and @@ -1103,7 +1117,6 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFRDT(j), 0); adapter->rx_ring[i].head = IXGBE_VFRDH(j); adapter->rx_ring[i].tail = IXGBE_VFRDT(j); - adapter->rx_ring[i].rx_buf_len = rx_buf_len; ixgbevf_configure_srrctl(adapter, j); } @@ -1113,36 +1126,47 @@ static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; + int err; + + if (!hw->mac.ops.set_vfta) + return -EOPNOTSUPP; spin_lock(&adapter->mbx_lock); /* add VID to filter table */ - if (hw->mac.ops.set_vfta) - hw->mac.ops.set_vfta(hw, vid, 0, true); + err = hw->mac.ops.set_vfta(hw, vid, 0, true); spin_unlock(&adapter->mbx_lock); + /* translate error return types so error makes sense */ + if (err == IXGBE_ERR_MBX) + return -EIO; + + if (err == IXGBE_ERR_INVALID_ARGUMENT) + return -EACCES; + set_bit(vid, adapter->active_vlans); - return 0; + return err; } static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbe_hw *hw = &adapter->hw; + int err = -EOPNOTSUPP; spin_lock(&adapter->mbx_lock); /* remove VID from filter table */ if (hw->mac.ops.set_vfta) - hw->mac.ops.set_vfta(hw, vid, 0, false); + err = hw->mac.ops.set_vfta(hw, vid, 0, false); spin_unlock(&adapter->mbx_lock); clear_bit(vid, adapter->active_vlans); - return 0; + return err; } static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter) @@ -1308,6 +1332,25 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter) adapter->stats.base_vfmprc = adapter->stats.last_vfmprc; } +static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int api[] = { ixgbe_mbox_api_10, + ixgbe_mbox_api_unknown }; + int err = 0, idx = 0; + + spin_lock(&adapter->mbx_lock); + + while (api[idx] != ixgbe_mbox_api_unknown) { + err = ixgbevf_negotiate_api_version(hw, api[idx]); + if (!err) + break; + idx++; + } + + spin_unlock(&adapter->mbx_lock); +} + static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1315,7 +1358,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) int i, j = 0; int num_rx_rings = adapter->num_rx_queues; u32 txdctl, rxdctl; - u32 msg[2]; for (i = 0; i < adapter->num_tx_queues; i++) { j = adapter->tx_ring[i].reg_idx; @@ -1356,10 +1398,6 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) hw->mac.ops.set_rar(hw, 0, hw->mac.perm_addr, 0); } - msg[0] = IXGBE_VF_SET_LPE; - msg[1] = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - hw->mbx.ops.write_posted(hw, msg, 2); - spin_unlock(&adapter->mbx_lock); clear_bit(__IXGBEVF_DOWN, &adapter->state); @@ -1371,6 +1409,7 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter) ixgbevf_save_reset_stats(adapter); ixgbevf_init_last_counter_stats(adapter); + hw->mac.get_link_status = 1; mod_timer(&adapter->watchdog_timer, jiffies); } @@ -1378,6 +1417,8 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; + ixgbevf_negotiate_api(adapter); + ixgbevf_configure(adapter); ixgbevf_up_complete(adapter); @@ -1419,7 +1460,7 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_adapter *adapter, rx_buffer_info->skb = NULL; do { struct sk_buff *this = skb; - skb = skb->prev; + skb = IXGBE_CB(skb)->prev; dev_kfree_skb(this); } while (skb); } @@ -1547,8 +1588,6 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter) { - struct ixgbe_hw *hw = &adapter->hw; - WARN_ON(in_interrupt()); while (test_and_set_bit(__IXGBEVF_RESETTING, &adapter->state)) @@ -1561,10 +1600,8 @@ void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter) * watchdog task will continue to schedule reset tasks until * the PF is up and running. */ - if (!hw->mac.ops.reset_hw(hw)) { - ixgbevf_down(adapter); - ixgbevf_up(adapter); - } + ixgbevf_down(adapter); + ixgbevf_up(adapter); clear_bit(__IXGBEVF_RESETTING, &adapter->state); } @@ -1867,6 +1904,22 @@ err_set_interrupt: } /** + * ixgbevf_clear_interrupt_scheme - Clear the current interrupt scheme settings + * @adapter: board private structure to clear interrupt scheme on + * + * We go through and clear interrupt specific resources and reset the structure + * to pre-load conditions + **/ +static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) +{ + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + + ixgbevf_free_q_vectors(adapter); + ixgbevf_reset_interrupt_capability(adapter); +} + +/** * ixgbevf_sw_init - Initialize general software structures * (struct ixgbevf_adapter) * @adapter: board private structure to initialize @@ -2351,6 +2404,8 @@ static int ixgbevf_open(struct net_device *netdev) } } + ixgbevf_negotiate_api(adapter); + /* allocate transmit descriptors */ err = ixgbevf_setup_all_tx_resources(adapter); if (err) @@ -2860,10 +2915,8 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN; int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE; - u32 msg[2]; if (adapter->hw.mac.type == ixgbe_mac_X540_vf) max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE; @@ -2877,35 +2930,91 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; - if (!netif_running(netdev)) { - msg[0] = IXGBE_VF_SET_LPE; - msg[1] = max_frame; - hw->mbx.ops.write_posted(hw, msg, 2); - } - if (netif_running(netdev)) ixgbevf_reinit_locked(adapter); return 0; } -static void ixgbevf_shutdown(struct pci_dev *pdev) +static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct ixgbevf_adapter *adapter = netdev_priv(netdev); +#ifdef CONFIG_PM + int retval = 0; +#endif netif_device_detach(netdev); if (netif_running(netdev)) { + rtnl_lock(); ixgbevf_down(adapter); ixgbevf_free_irq(adapter); ixgbevf_free_all_tx_resources(adapter); ixgbevf_free_all_rx_resources(adapter); + rtnl_unlock(); } - pci_save_state(pdev); + ixgbevf_clear_interrupt_scheme(adapter); +#ifdef CONFIG_PM + retval = pci_save_state(pdev); + if (retval) + return retval; + +#endif pci_disable_device(pdev); + + return 0; +} + +#ifdef CONFIG_PM +static int ixgbevf_resume(struct pci_dev *pdev) +{ + struct ixgbevf_adapter *adapter = pci_get_drvdata(pdev); + struct net_device *netdev = adapter->netdev; + u32 err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + /* + * pci_restore_state clears dev->state_saved so call + * pci_save_state to restore it. + */ + pci_save_state(pdev); + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n"); + return err; + } + pci_set_master(pdev); + + rtnl_lock(); + err = ixgbevf_init_interrupt_scheme(adapter); + rtnl_unlock(); + if (err) { + dev_err(&pdev->dev, "Cannot initialize interrupts\n"); + return err; + } + + ixgbevf_reset(adapter); + + if (netif_running(netdev)) { + err = ixgbevf_open(netdev); + if (err) + return err; + } + + netif_device_attach(netdev); + + return err; +} + +#endif /* CONFIG_PM */ +static void ixgbevf_shutdown(struct pci_dev *pdev) +{ + ixgbevf_suspend(pdev, PMSG_SUSPEND); } static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, @@ -2946,7 +3055,7 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev, return stats; } -static const struct net_device_ops ixgbe_netdev_ops = { +static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_open = ixgbevf_open, .ndo_stop = ixgbevf_close, .ndo_start_xmit = ixgbevf_xmit_frame, @@ -2962,7 +3071,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { static void ixgbevf_assign_netdev_ops(struct net_device *dev) { - dev->netdev_ops = &ixgbe_netdev_ops; + dev->netdev_ops = &ixgbevf_netdev_ops; ixgbevf_set_ethtool_ops(dev); dev->watchdog_timeo = 5 * HZ; } @@ -3131,6 +3240,7 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, return 0; err_register: + ixgbevf_clear_interrupt_scheme(adapter); err_sw_init: ixgbevf_reset_interrupt_capability(adapter); iounmap(hw->hw_addr); @@ -3168,6 +3278,7 @@ static void __devexit ixgbevf_remove(struct pci_dev *pdev) if (netdev->reg_state == NETREG_REGISTERED) unregister_netdev(netdev); + ixgbevf_clear_interrupt_scheme(adapter); ixgbevf_reset_interrupt_capability(adapter); iounmap(adapter->hw.hw_addr); @@ -3256,7 +3367,7 @@ static void ixgbevf_io_resume(struct pci_dev *pdev) } /* PCI Error Recovery (ERS) */ -static struct pci_error_handlers ixgbevf_err_handler = { +static const struct pci_error_handlers ixgbevf_err_handler = { .error_detected = ixgbevf_io_error_detected, .slot_reset = ixgbevf_io_slot_reset, .resume = ixgbevf_io_resume, @@ -3267,6 +3378,11 @@ static struct pci_driver ixgbevf_driver = { .id_table = ixgbevf_pci_tbl, .probe = ixgbevf_probe, .remove = __devexit_p(ixgbevf_remove), +#ifdef CONFIG_PM + /* Power Management Hooks */ + .suspend = ixgbevf_suspend, + .resume = ixgbevf_resume, +#endif .shutdown = ixgbevf_shutdown, .err_handler = &ixgbevf_err_handler }; diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c index 9c955900fe6..d5028ddf4b3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.c +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c @@ -86,14 +86,17 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw) static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val = IXGBE_ERR_MBX; + s32 ret_val = -IXGBE_ERR_MBX; + + if (!mbx->ops.read) + goto out; ret_val = ixgbevf_poll_for_msg(hw); /* if ack received read message, otherwise we timed out */ if (!ret_val) ret_val = mbx->ops.read(hw, msg, size); - +out: return ret_val; } @@ -109,7 +112,11 @@ static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val; + s32 ret_val = -IXGBE_ERR_MBX; + + /* exit if either we can't write or there isn't a defined timeout */ + if (!mbx->ops.write || !mbx->timeout) + goto out; /* send msg */ ret_val = mbx->ops.write(hw, msg, size); @@ -117,7 +124,7 @@ static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) /* if msg sent wait until we receive an ack */ if (!ret_val) ret_val = ixgbevf_poll_for_ack(hw); - +out: return ret_val; } diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h index cf9131c5c11..946ce86f337 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.h +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h @@ -76,12 +76,29 @@ /* bits 23:16 are used for exra info for certain messages */ #define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT) +/* definitions to support mailbox API version negotiation */ + +/* + * each element denotes a version of the API; existing numbers may not + * change; any additions must go at the end + */ +enum ixgbe_pfvf_api_rev { + ixgbe_mbox_api_10, /* API version 1.0, linux/freebsd VF driver */ + ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */ + /* This value should always be last */ + ixgbe_mbox_api_unknown, /* indicates that API version is not known */ +}; + +/* mailbox API, legacy requests */ #define IXGBE_VF_RESET 0x01 /* VF requests reset */ #define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */ #define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */ #define IXGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */ -#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ -#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ + +/* mailbox API, version 1.0 VF requests */ +#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ +#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ +#define IXGBE_VF_API_NEGOTIATE 0x08 /* negotiate API version */ /* length of permanent address message returned from PF */ #define IXGBE_VF_PERMADDR_MSG_LEN 4 diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index ec89b86f7ca..0c7447e6fcc 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -79,6 +79,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) /* Call adapter stop to disable tx/rx and clear interrupts */ hw->mac.ops.stop_adapter(hw); + /* reset the api version */ + hw->api_version = ixgbe_mbox_api_10; + IXGBE_WRITE_REG(hw, IXGBE_VFCTRL, IXGBE_CTRL_RST); IXGBE_WRITE_FLUSH(hw); @@ -97,7 +100,7 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) msgbuf[0] = IXGBE_VF_RESET; mbx->ops.write_posted(hw, msgbuf, 1); - msleep(10); + mdelay(10); /* set our "perm_addr" based on info provided by PF */ /* also set up the mc_filter_type which is piggy backed @@ -346,16 +349,32 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw, static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on) { + struct ixgbe_mbx_info *mbx = &hw->mbx; u32 msgbuf[2]; + s32 err; msgbuf[0] = IXGBE_VF_SET_VLAN; msgbuf[1] = vlan; /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */ msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT; - ixgbevf_write_msg_read_ack(hw, msgbuf, 2); + err = mbx->ops.write_posted(hw, msgbuf, 2); + if (err) + goto mbx_err; - return 0; + err = mbx->ops.read_posted(hw, msgbuf, 2); + if (err) + goto mbx_err; + + /* remove extra bits from the message */ + msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; + msgbuf[0] &= ~(0xFF << IXGBE_VT_MSGINFO_SHIFT); + + if (msgbuf[0] != (IXGBE_VF_SET_VLAN | IXGBE_VT_MSGTYPE_ACK)) + err = IXGBE_ERR_INVALID_ARGUMENT; + +mbx_err: + return err; } /** @@ -389,20 +408,23 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, bool *link_up, bool autoneg_wait_to_complete) { + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val = 0; u32 links_reg; + u32 in_msg = 0; - if (!(hw->mbx.ops.check_for_rst(hw))) { - *link_up = false; - *speed = 0; - return -1; - } + /* If we were hit with a reset drop the link */ + if (!mbx->ops.check_for_rst(hw) || !mbx->timeout) + mac->get_link_status = true; - links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + if (!mac->get_link_status) + goto out; - if (links_reg & IXGBE_LINKS_UP) - *link_up = true; - else - *link_up = false; + /* if link status is down no point in checking to see if pf is up */ + links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + if (!(links_reg & IXGBE_LINKS_UP)) + goto out; switch (links_reg & IXGBE_LINKS_SPEED_82599) { case IXGBE_LINKS_SPEED_10G_82599: @@ -416,7 +438,79 @@ static s32 ixgbevf_check_mac_link_vf(struct ixgbe_hw *hw, break; } - return 0; + /* if the read failed it could just be a mailbox collision, best wait + * until we are called again and don't report an error */ + if (mbx->ops.read(hw, &in_msg, 1)) + goto out; + + if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) { + /* msg is not CTS and is NACK we must have lost CTS status */ + if (in_msg & IXGBE_VT_MSGTYPE_NACK) + ret_val = -1; + goto out; + } + + /* the pf is talking, if we timed out in the past we reinit */ + if (!mbx->timeout) { + ret_val = -1; + goto out; + } + + /* if we passed all the tests above then the link is up and we no + * longer need to check for link */ + mac->get_link_status = false; + +out: + *link_up = !mac->get_link_status; + return ret_val; +} + +/** + * ixgbevf_rlpml_set_vf - Set the maximum receive packet length + * @hw: pointer to the HW structure + * @max_size: value to assign to max frame size + **/ +void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size) +{ + u32 msgbuf[2]; + + msgbuf[0] = IXGBE_VF_SET_LPE; + msgbuf[1] = max_size; + ixgbevf_write_msg_read_ack(hw, msgbuf, 2); +} + +/** + * ixgbevf_negotiate_api_version - Negotiate supported API version + * @hw: pointer to the HW structure + * @api: integer containing requested API version + **/ +int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api) +{ + int err; + u32 msg[3]; + + /* Negotiate the mailbox API version */ + msg[0] = IXGBE_VF_API_NEGOTIATE; + msg[1] = api; + msg[2] = 0; + err = hw->mbx.ops.write_posted(hw, msg, 3); + + if (!err) + err = hw->mbx.ops.read_posted(hw, msg, 3); + + if (!err) { + msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; + + /* Store value and return 0 on success */ + if (msg[0] == (IXGBE_VF_API_NEGOTIATE | IXGBE_VT_MSGTYPE_ACK)) { + hw->api_version = api; + return 0; + } + + err = IXGBE_ERR_INVALID_ARGUMENT; + } + + return err; } static const struct ixgbe_mac_operations ixgbevf_mac_ops = { diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index 25c951daee5..47f11a584d8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -137,6 +137,8 @@ struct ixgbe_hw { u8 revision_id; bool adapter_stopped; + + int api_version; }; struct ixgbevf_hw_stats { @@ -170,5 +172,7 @@ struct ixgbevf_info { const struct ixgbe_mac_operations *mac_ops; }; +void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); +int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api); #endif /* __IXGBE_VF_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index c8fef435302..3d1899ff107 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -40,6 +40,7 @@ #include <linux/mlx4/cmd.h> #include <linux/semaphore.h> +#include <rdma/ib_smi.h> #include <asm/io.h> @@ -394,7 +395,8 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr; int ret; - down(&priv->cmd.slave_sem); + mutex_lock(&priv->cmd.slave_cmd_mutex); + vhcr->in_param = cpu_to_be64(in_param); vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0; vhcr->in_modifier = cpu_to_be32(in_modifier); @@ -402,6 +404,7 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, vhcr->token = cpu_to_be16(CMD_POLL_TOKEN); vhcr->status = 0; vhcr->flags = !!(priv->cmd.use_events) << 6; + if (mlx4_is_master(dev)) { ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr); if (!ret) { @@ -438,7 +441,8 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, mlx4_err(dev, "failed execution of VHCR_POST command" "opcode 0x%x\n", op); } - up(&priv->cmd.slave_sem); + + mutex_unlock(&priv->cmd.slave_cmd_mutex); return ret; } @@ -627,6 +631,162 @@ static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); } +static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox) +{ + struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf); + struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf); + int err; + int i; + + if (index & 0x1f) + return -EINVAL; + + in_mad->attr_mod = cpu_to_be32(index / 32); + + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (err) + return err; + + for (i = 0; i < 32; ++i) + pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]); + + return err; +} + +static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox) +{ + int i; + int err; + + for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) { + err = query_pkey_block(dev, port, i, table + i, inbox, outbox); + if (err) + return err; + } + + return 0; +} +#define PORT_CAPABILITY_LOCATION_IN_SMP 20 +#define PORT_STATE_OFFSET 32 + +static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf) +{ + if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP) + return IB_PORT_ACTIVE; + else + return IB_PORT_DOWN; +} + +static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct ib_smp *smp = inbox->buf; + u32 index; + u8 port; + u16 *table; + int err; + int vidx, pidx; + struct mlx4_priv *priv = mlx4_priv(dev); + struct ib_smp *outsmp = outbox->buf; + __be16 *outtab = (__be16 *)(outsmp->data); + __be32 slave_cap_mask; + __be64 slave_node_guid; + port = vhcr->in_modifier; + + if (smp->base_version == 1 && + smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->class_version == 1) { + if (smp->method == IB_MGMT_METHOD_GET) { + if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) { + index = be32_to_cpu(smp->attr_mod); + if (port < 1 || port > dev->caps.num_ports) + return -EINVAL; + table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL); + if (!table) + return -ENOMEM; + /* need to get the full pkey table because the paravirtualized + * pkeys may be scattered among several pkey blocks. + */ + err = get_full_pkey_table(dev, port, table, inbox, outbox); + if (!err) { + for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) { + pidx = priv->virt2phys_pkey[slave][port - 1][vidx]; + outtab[vidx % 32] = cpu_to_be16(table[pidx]); + } + } + kfree(table); + return err; + } + if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) { + /*get the slave specific caps:*/ + /*do the command */ + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + /* modify the response for slaves */ + if (!err && slave != mlx4_master_func_num(dev)) { + u8 *state = outsmp->data + PORT_STATE_OFFSET; + + *state = (*state & 0xf0) | vf_port_state(dev, port, slave); + slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; + memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4); + } + return err; + } + if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) { + /* compute slave's gid block */ + smp->attr_mod = cpu_to_be32(slave / 8); + /* execute cmd */ + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + if (!err) { + /* if needed, move slave gid to index 0 */ + if (slave % 8) + memcpy(outsmp->data, + outsmp->data + (slave % 8) * 8, 8); + /* delete all other gids */ + memset(outsmp->data + 8, 0, 56); + } + return err; + } + if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + if (!err) { + slave_node_guid = mlx4_get_slave_node_guid(dev, slave); + memcpy(outsmp->data + 12, &slave_node_guid, 8); + } + return err; + } + } + } + if (slave != mlx4_master_func_num(dev) && + ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) || + (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED && + smp->method == IB_MGMT_METHOD_SET))) { + mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, " + "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n", + slave, smp->method, smp->mgmt_class, + be16_to_cpu(smp->attr_id)); + return -EPERM; + } + /*default:*/ + return mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); +} + int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -950,7 +1110,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_INIT2INIT_QP_wrapper }, { .opcode = MLX4_CMD_INIT2RTR_QP, @@ -968,7 +1128,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_RTR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_RTS2RTS_QP, @@ -977,7 +1137,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_RTS2RTS_QP_wrapper }, { .opcode = MLX4_CMD_SQERR2RTS_QP, @@ -986,7 +1146,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQERR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2ERR_QP, @@ -1013,7 +1173,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQD2SQD_QP_wrapper }, { .opcode = MLX4_CMD_SQD2RTS_QP, @@ -1022,7 +1182,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQD2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2RST_QP, @@ -1061,6 +1221,24 @@ static struct mlx4_cmd_info cmd_info[] = { .wrapper = mlx4_GEN_QP_wrapper }, { + .opcode = MLX4_CMD_CONF_SPECIAL_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, /* XXX verify: only demux can do this */ + .wrapper = NULL + }, + { + .opcode = MLX4_CMD_MAD_IFC, + .has_inbox = true, + .has_outbox = true, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_MAD_IFC_wrapper + }, + { .opcode = MLX4_CMD_QUERY_IF_STAT, .has_inbox = false, .has_outbox = true, @@ -1340,6 +1518,8 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) goto inform_slave_state; + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave); + /* write the version in the event field */ reply |= mlx4_comm_get_version(); @@ -1376,19 +1556,21 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, goto reset_slave; slave_state[slave].vhcr_dma |= param; slave_state[slave].active = true; + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) goto reset_slave; - down(&priv->cmd.slave_sem); + + mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { mlx4_err(dev, "Failed processing vhcr for slave:%d," " resetting slave.\n", slave); - up(&priv->cmd.slave_sem); + mutex_unlock(&priv->cmd.slave_cmd_mutex); goto reset_slave; } - up(&priv->cmd.slave_sem); + mutex_unlock(&priv->cmd.slave_cmd_mutex); break; default: mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave); @@ -1529,14 +1711,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) struct mlx4_slave_state *s_state; int i, j, err, port; - priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, - &priv->mfunc.vhcr_dma, - GFP_KERNEL); - if (!priv->mfunc.vhcr) { - mlx4_err(dev, "Couldn't allocate vhcr.\n"); - return -ENOMEM; - } - if (mlx4_is_master(dev)) priv->mfunc.comm = ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + @@ -1590,6 +1764,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) INIT_WORK(&priv->mfunc.master.slave_flr_event_work, mlx4_master_handle_slave_flr); spin_lock_init(&priv->mfunc.master.slave_state_lock); + spin_lock_init(&priv->mfunc.master.slave_eq.event_lock); priv->mfunc.master.comm_wq = create_singlethread_workqueue("mlx4_comm"); if (!priv->mfunc.master.comm_wq) @@ -1598,7 +1773,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_init_resource_tracker(dev)) goto err_thread; - sema_init(&priv->cmd.slave_sem, 1); err = mlx4_ARM_COMM_CHANNEL(dev); if (err) { mlx4_err(dev, " Failed to arm comm channel eq: %x\n", @@ -1612,8 +1786,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) mlx4_err(dev, "Couldn't sync toggles\n"); goto err_comm; } - - sema_init(&priv->cmd.slave_sem, 1); } return 0; @@ -1643,6 +1815,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); mutex_init(&priv->cmd.hcr_mutex); + mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; priv->cmd.toggle = 1; @@ -1659,14 +1832,30 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } } + if (mlx4_is_mfunc(dev)) { + priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, + &priv->mfunc.vhcr_dma, + GFP_KERNEL); + if (!priv->mfunc.vhcr) { + mlx4_err(dev, "Couldn't allocate VHCR.\n"); + goto err_hcr; + } + } + priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) - goto err_hcr; + goto err_vhcr; return 0; +err_vhcr: + if (mlx4_is_mfunc(dev)) + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr, priv->mfunc.vhcr_dma); + priv->mfunc.vhcr = NULL; + err_hcr: if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); @@ -1689,9 +1878,6 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) } iounmap(priv->mfunc.comm); - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, priv->mfunc.vhcr_dma); - priv->mfunc.vhcr = NULL; } void mlx4_cmd_cleanup(struct mlx4_dev *dev) @@ -1702,6 +1888,10 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev) if (!mlx4_is_slave(dev)) iounmap(priv->cmd.hcr); + if (mlx4_is_mfunc(dev)) + dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr, priv->mfunc.vhcr_dma); + priv->mfunc.vhcr = NULL; } /* diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 10bba09c44e..c10e3a6de09 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -712,10 +712,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) if (bounce) tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size); - /* Run destructor before passing skb to HW */ - if (likely(!skb_shared(skb))) - skb_orphan(skb); - if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tag) { *(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); op_own |= htonl((bf_index & 0xffff) << 8); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 99a04648fab..51c764901ad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -164,13 +164,16 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq; - struct mlx4_eqe *s_eqe = - &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; + struct mlx4_eqe *s_eqe; + unsigned long flags; + spin_lock_irqsave(&slave_eq->event_lock, flags); + s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)]; if ((!!(s_eqe->owner & 0x80)) ^ (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) { mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. " "No free EQE on slave events queue\n", slave); + spin_unlock_irqrestore(&slave_eq->event_lock, flags); return; } @@ -183,6 +186,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe) queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_event_work); + spin_unlock_irqrestore(&slave_eq->event_lock, flags); } static void mlx4_slave_event(struct mlx4_dev *dev, int slave, @@ -200,6 +204,196 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave, slave_event(dev, slave, eqe); } +int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_eqe eqe; + + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave]; + + if (!s_slave->active) + return 0; + + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; + eqe.event.port_mgmt_change.port = port; + + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_pkey_eqe); + +int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_eqe eqe; + + /*don't send if we don't have the that slave */ + if (dev->num_vfs < slave) + return 0; + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; + eqe.event.port_mgmt_change.port = port; + + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_guid_change_eqe); + +int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, + u8 port_subtype_change) +{ + struct mlx4_eqe eqe; + + /*don't send if we don't have the that slave */ + if (dev->num_vfs < slave) + return 0; + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; + eqe.subtype = port_subtype_change; + eqe.event.port_change.port = cpu_to_be32(port << 28); + + mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__, + port_subtype_change, slave, port); + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe); + +enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return SLAVE_PORT_DOWN; + } + return s_state[slave].port_state[port]; +} +EXPORT_SYMBOL(mlx4_get_slave_port_state); + +static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, + enum slave_port_state state) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return -1; + } + s_state[slave].port_state[port] = state; + + return 0; +} + +static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) +{ + int i; + enum slave_port_gen_event gen_event; + + for (i = 0; i < dev->num_slaves; i++) + set_and_calc_slave_port_state(dev, i, port, event, &gen_event); +} +/************************************************************************** + The function get as input the new event to that port, + and according to the prev state change the slave's port state. + The events are: + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + MLX4_PORT_STATE_DEV_EVENT_PORT_UP + MLX4_PORT_STATE_IB_EVENT_GID_VALID + MLX4_PORT_STATE_IB_EVENT_GID_INVALID +***************************************************************************/ +int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, + u8 port, int event, + enum slave_port_gen_event *gen_event) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *ctx = NULL; + unsigned long flags; + int ret = -1; + enum slave_port_state cur_state = + mlx4_get_slave_port_state(dev, slave, port); + + *gen_event = SLAVE_PORT_GEN_EVENT_NONE; + + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return ret; + } + + ctx = &priv->mfunc.master.slave_state[slave]; + spin_lock_irqsave(&ctx->lock, flags); + + mlx4_dbg(dev, "%s: slave: %d, current state: %d new event :%d\n", + __func__, slave, cur_state, event); + + switch (cur_state) { + case SLAVE_PORT_DOWN: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event) + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PENDING_UP); + break; + case SLAVE_PENDING_UP: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_DOWN); + else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_UP); + *gen_event = SLAVE_PORT_GEN_EVENT_UP; + } + break; + case SLAVE_PORT_UP: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_DOWN); + *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; + } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID == + event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PENDING_UP); + *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; + } + break; + default: + pr_err("%s: BUG!!! UNKNOWN state: " + "slave:%d, port:%d\n", __func__, slave, port); + goto out; + } + ret = mlx4_get_slave_port_state(dev, slave, port); + mlx4_dbg(dev, "%s: slave: %d, current state: %d new event" + " :%d gen_event: %d\n", + __func__, slave, cur_state, event, *gen_event); + +out: + spin_unlock_irqrestore(&ctx->lock, flags); + return ret; +} + +EXPORT_SYMBOL(set_and_calc_slave_port_state); + +int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) +{ + struct mlx4_eqe eqe; + + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; + eqe.event.port_mgmt_change.port = port; + eqe.event.port_mgmt_change.params.port_info.changed_attr = + cpu_to_be32((u32) attr); + + slave_event(dev, ALL_SLAVES, &eqe); + return 0; +} +EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev); + void mlx4_master_handle_slave_flr(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = @@ -251,6 +445,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) u32 flr_slave; u8 update_slave_state; int i; + enum slave_port_gen_event gen_event; while ((eqe = next_eqe_sw(eq))) { /* @@ -347,35 +542,49 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) case MLX4_EVENT_TYPE_PORT_CHANGE: port = be32_to_cpu(eqe->event.port_change.port) >> 28; if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { - mlx4_dispatch_event(dev, - MLX4_DEV_EVENT_PORT_DOWN, + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, port); mlx4_priv(dev)->sense.do_sense_port[port] = 1; - if (mlx4_is_master(dev)) - /*change the state of all slave's port - * to down:*/ - for (i = 0; i < dev->num_slaves; i++) { - mlx4_dbg(dev, "%s: Sending " - "MLX4_PORT_CHANGE_SUBTYPE_DOWN" + if (!mlx4_is_master(dev)) + break; + for (i = 0; i < dev->num_slaves; i++) { + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { + if (i == mlx4_master_func_num(dev)) + continue; + mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); - if (i == dev->caps.function) - continue; mlx4_slave_event(dev, i, eqe); + } else { /* IB port */ + set_and_calc_slave_port_state(dev, i, port, + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + &gen_event); + /*we can be in pending state, then do not send port_down event*/ + if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) { + if (i == mlx4_master_func_num(dev)) + continue; + mlx4_slave_event(dev, i, eqe); + } } + } } else { - mlx4_dispatch_event(dev, - MLX4_DEV_EVENT_PORT_UP, - port); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port); + mlx4_priv(dev)->sense.do_sense_port[port] = 0; - if (mlx4_is_master(dev)) { + if (!mlx4_is_master(dev)) + break; + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) for (i = 0; i < dev->num_slaves; i++) { - if (i == dev->caps.function) + if (i == mlx4_master_func_num(dev)) continue; mlx4_slave_event(dev, i, eqe); } - } + else /* IB port */ + /* port-up event will be sent to a slave when the + * slave's alias-guid is set. This is done in alias_GUID.c + */ + set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP); } break; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c6964848732..4f30b99324c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -183,7 +183,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c -#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 +#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 @@ -194,21 +194,39 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 #define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc +#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 +#define QUERY_FUNC_CAP_QP0_PROXY 0x14 +#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 +#define QUERY_FUNC_CAP_QP1_PROXY 0x1c + #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { - field = vhcr->in_modifier; - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - field = 0; /* ensure force vlan and force mac bits are not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); /* ensure that phy_wqe_gid bit is not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + field = vhcr->in_modifier; /* phys-port = logical-port */ + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + + /* size is now the QP number */ + size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); + + size += 2; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); + + size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); + + size += 2; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); + } else if (vhcr->op_modifier == 0) { /* enable rdma and ethernet interfaces */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); @@ -253,99 +271,118 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, return err; } -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, + struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; - u8 field; + u8 field, op_modifier; u32 size; - int i; int err = 0; + op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FUNC_CAP, + err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, + MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); - if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { - mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); - err = -EPROTONOSUPPORT; - goto out; - } - func_cap->flags = field; + if (!op_modifier) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); + if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { + mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); + err = -EPROTONOSUPPORT; + goto out; + } + func_cap->flags = field; + + MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); + func_cap->num_ports = field; - MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); - func_cap->num_ports = field; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); + func_cap->pf_context_behaviour = size; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); - func_cap->pf_context_behaviour = size; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + func_cap->qp_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); - func_cap->qp_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + func_cap->srq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); - func_cap->srq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + func_cap->cq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); - func_cap->cq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + func_cap->max_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); - func_cap->max_eq = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + func_cap->reserved_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); - func_cap->reserved_eq = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + func_cap->mpt_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); - func_cap->mpt_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + func_cap->mtt_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); - func_cap->mtt_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + func_cap->mcg_quota = size & 0xFFFFFF; + goto out; + } - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); - func_cap->mcg_quota = size & 0xFFFFFF; + /* logical port query */ + if (gen_or_port > dev->caps.num_ports) { + err = -EINVAL; + goto out; + } - for (i = 1; i <= func_cap->num_ports; ++i) { - err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1, - MLX4_CMD_QUERY_FUNC_CAP, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); - if (err) + if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { + mlx4_err(dev, "VLAN is enforced on this port\n"); + err = -EPROTONOSUPPORT; goto out; + } - if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { - mlx4_err(dev, "VLAN is enforced on this port\n"); - err = -EPROTONOSUPPORT; - goto out; - } - - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { - mlx4_err(dev, "Force mac is enabled on this port\n"); - err = -EPROTONOSUPPORT; - goto out; - } - } else if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { - mlx4_err(dev, "phy_wqe_gid is " - "enforced on this ib port\n"); - err = -EPROTONOSUPPORT; - goto out; - } + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { + mlx4_err(dev, "Force mac is enabled on this port\n"); + err = -EPROTONOSUPPORT; + goto out; } + } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { + mlx4_err(dev, "phy_wqe_gid is " + "enforced on this ib port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + } - MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - func_cap->physical_port[i] = field; + MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + func_cap->physical_port = field; + if (func_cap->physical_port != gen_or_port) { + err = -ENOSYS; + goto out; } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); + func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); + func_cap->qp0_proxy_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); + func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); + func_cap->qp1_proxy_qpn = size & 0xFFFFFF; + /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index @@ -559,7 +596,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_pds = 1 << (field & 0x3f); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET); dev_cap->reserved_xrcds = field >> 4; - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET); + MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET); dev_cap->max_xrcds = 1 << (field & 0x1f); MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET); @@ -715,6 +752,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + u64 flags; int err = 0; u8 field; @@ -723,6 +761,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; + /* add port mng change event capability unconditionally to slaves */ + MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; + MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); + /* For guests, report Blueflame disabled */ MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET); field &= 0x7f; @@ -1345,6 +1388,19 @@ out: return err; } +/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0 + * and real QP0 are active, so that the paravirtualized QP0 is ready + * to operate */ +static int check_qp0_state(struct mlx4_dev *dev, int function, int port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + /* irrelevant if not infiniband */ + if (priv->mfunc.master.qp0_state[port].proxy_qp0_active && + priv->mfunc.master.qp0_state[port].qp0_active) + return 1; + return 0; +} + int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1358,17 +1414,29 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; - if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) - return -ENODEV; - - /* Enable port only if it was previously disabled */ - if (!priv->mfunc.master.init_port_ref[port]) { - err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); - if (err) - return err; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { + /* Enable port only if it was previously disabled */ + if (!priv->mfunc.master.init_port_ref[port]) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + } + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); + } else { + if (slave == mlx4_master_func_num(dev)) { + if (check_qp0_state(dev, slave, port) && + !priv->mfunc.master.qp0_state[port].port_active) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT, + MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); + if (err) + return err; + priv->mfunc.master.qp0_state[port].port_active = 1; + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); + } + } else + priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); } - priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port); ++priv->mfunc.master.init_port_ref[port]; return 0; } @@ -1441,15 +1509,29 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, (1 << port))) return 0; - if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) - return -ENODEV; - if (priv->mfunc.master.init_port_ref[port] == 1) { - err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000, - MLX4_CMD_NATIVE); - if (err) - return err; + if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { + if (priv->mfunc.master.init_port_ref[port] == 1) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, + 1000, MLX4_CMD_NATIVE); + if (err) + return err; + } + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); + } else { + /* infiniband port */ + if (slave == mlx4_master_func_num(dev)) { + if (!priv->mfunc.master.qp0_state[port].qp0_active && + priv->mfunc.master.qp0_state[port].port_active) { + err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, + 1000, MLX4_CMD_NATIVE); + if (err) + return err; + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); + priv->mfunc.master.qp0_state[port].port_active = 0; + } + } else + priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); } - priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port); --priv->mfunc.master.init_port_ref[port]; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 83fcbbf1b16..85abe9c11a2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,8 +134,12 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; - u8 physical_port[MLX4_MAX_PORTS + 1]; - u8 port_flags[MLX4_MAX_PORTS + 1]; + u32 qp0_tunnel_qpn; + u32 qp0_proxy_qpn; + u32 qp1_tunnel_qpn; + u32 qp1_proxy_qpn; + u8 physical_port; + u8 port_flags; }; struct mlx4_adapter { @@ -192,7 +196,8 @@ struct mlx4_set_ib_param { }; int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap); +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, + struct mlx4_func_cap *func_cap); int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2f816c6aed7..80df2ab0177 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -95,8 +95,6 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num" " Not in use with device managed" " flow steering"); -#define MLX4_VF (1 << 0) - #define HCA_GLOBAL_CAP_MASK 0 #define PF_CONTEXT_BEHAVIOUR_MASK 0 @@ -299,9 +297,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) mlx4_dbg(dev, "Steering mode is: %s\n", mlx4_steering_mode_str(dev->caps.steering_mode)); - /* Sense port always allowed on supported devices for ConnectX1 and 2 */ - if (dev->pdev->device != 0x1003) + /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ + if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; + /* Don't do sense port on multifunction devices (for now at least) */ + if (mlx4_is_mfunc(dev)) + dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; dev->caps.log_num_macs = log_num_mac; dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS; @@ -384,6 +385,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; + dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; return 0; } /*The function checks if there are live vf, return the num of them*/ @@ -409,20 +411,54 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) { u32 qk = MLX4_RESERVED_QKEY_BASE; - if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || - qpn < dev->caps.sqp_start) + + if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || + qpn < dev->phys_caps.base_proxy_sqpn) return -EINVAL; - if (qpn >= dev->caps.base_tunnel_sqpn) + if (qpn >= dev->phys_caps.base_tunnel_sqpn) /* tunnel qp */ - qk += qpn - dev->caps.base_tunnel_sqpn; + qk += qpn - dev->phys_caps.base_tunnel_sqpn; else - qk += qpn - dev->caps.sqp_start; + qk += qpn - dev->phys_caps.base_proxy_sqpn; *qkey = qk; return 0; } EXPORT_SYMBOL(mlx4_get_parav_qkey); +void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return; + + priv->virt2phys_pkey[slave][port - 1][i] = val; +} +EXPORT_SYMBOL(mlx4_sync_pkey_table); + +void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return; + + priv->slave_node_guids[slave] = guid; +} +EXPORT_SYMBOL(mlx4_put_slave_node_guid); + +__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return 0; + + return priv->slave_node_guids[slave]; +} +EXPORT_SYMBOL(mlx4_get_slave_node_guid); + int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -493,9 +529,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } memset(&func_cap, 0, sizeof(func_cap)); - err = mlx4_QUERY_FUNC_CAP(dev, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { - mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n"); + mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", + err); return err; } @@ -523,12 +560,33 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + + if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + err = -ENOMEM; + goto err_mem; + } + for (i = 1; i <= dev->caps.num_ports; ++i) { + err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); + if (err) { + mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" + " port %d, aborting (%d).\n", i, err); + goto err_mem; + } + dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; + dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; + dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; + dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.port_mask[i] = dev->caps.port_type[i]; if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i])) - return -ENODEV; + goto err_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - @@ -538,10 +596,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) "PCI resource 2 size of 0x%llx, aborting.\n", dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->pdev, 2)); - return -ENODEV; + goto err_mem; } return 0; + +err_mem: + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + dev->caps.qp0_tunnel = dev->caps.qp0_proxy = + dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + + return err; } /* @@ -1092,10 +1160,10 @@ static void mlx4_slave_exit(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - down(&priv->cmd.slave_sem); + mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function.\n"); - up(&priv->cmd.slave_sem); + mutex_unlock(&priv->cmd.slave_cmd_mutex); } static int map_bf_area(struct mlx4_dev *dev) @@ -1147,7 +1215,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev) u32 slave_read; u32 cmd_channel_ver; - down(&priv->cmd.slave_sem); + mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, @@ -1196,12 +1264,13 @@ static int mlx4_init_slave(struct mlx4_dev *dev) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) goto err; - up(&priv->cmd.slave_sem); + + mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); - up(&priv->cmd.slave_sem); + mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } @@ -1848,7 +1917,7 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) iounmap(owner); } -static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) +static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) { struct mlx4_priv *priv; struct mlx4_dev *dev; @@ -1871,12 +1940,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) /* * Check for BARs. */ - if (((id == NULL) || !(id->driver_data & MLX4_VF)) && + if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) && !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { dev_err(&pdev->dev, "Missing DCS, aborting." - "(id == 0X%p, id->driver_data: 0x%lx," - " pci_resource_flags(pdev, 0):0x%lx)\n", id, - id ? id->driver_data : 0, pci_resource_flags(pdev, 0)); + "(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n", + pci_dev_data, pci_resource_flags(pdev, 0)); err = -ENODEV; goto err_disable_pdev; } @@ -1941,7 +2009,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev->rev_id = pdev->revision; /* Detect if this device is a virtual function */ - if (id && id->driver_data & MLX4_VF) { + if (pci_dev_data & MLX4_PCI_DEV_IS_VF) { /* When acting as pf, we normally skip vfs unless explicitly * requested to probe them. */ if (num_vfs && extended_func_num(pdev) > probe_vf) { @@ -1969,12 +2037,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) } if (num_vfs) { - mlx4_warn(dev, "Enabling sriov with:%d vfs\n", num_vfs); + mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); err = pci_enable_sriov(pdev, num_vfs); if (err) { - mlx4_err(dev, "Failed to enable sriov," - "continuing without sriov enabled" - " (err = %d).\n", err); + mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", + err); err = 0; } else { mlx4_warn(dev, "Running in master mode\n"); @@ -2089,6 +2156,7 @@ slave_start: mlx4_sense_init(dev); mlx4_start_sense(dev); + priv->pci_dev_data = pci_dev_data; pci_set_drvdata(pdev, dev); return 0; @@ -2158,7 +2226,7 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev, { printk_once(KERN_INFO "%s", mlx4_version); - return __mlx4_init_one(pdev, id); + return __mlx4_init_one(pdev, id->driver_data); } static void mlx4_remove_one(struct pci_dev *pdev) @@ -2217,12 +2285,18 @@ static void mlx4_remove_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); if (dev->flags & MLX4_FLAG_SRIOV) { - mlx4_warn(dev, "Disabling sriov\n"); + mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + kfree(priv); pci_release_regions(pdev); pci_disable_device(pdev); @@ -2232,41 +2306,46 @@ static void mlx4_remove_one(struct pci_dev *pdev) int mlx4_restart_one(struct pci_dev *pdev) { + struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_priv *priv = mlx4_priv(dev); + int pci_dev_data; + + pci_dev_data = priv->pci_dev_data; mlx4_remove_one(pdev); - return __mlx4_init_one(pdev, NULL); + return __mlx4_init_one(pdev, pci_dev_data); } static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = { /* MT25408 "Hermon" SDR */ - { PCI_VDEVICE(MELLANOX, 0x6340), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR */ - { PCI_VDEVICE(MELLANOX, 0x634a), 0 }, + { PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR */ - { PCI_VDEVICE(MELLANOX, 0x6354), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" DDR PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6732), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" QDR PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x673c), 0 }, + { PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x6368), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x6750), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x6372), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */ - { PCI_VDEVICE(MELLANOX, 0x675a), 0 }, + { PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/ - { PCI_VDEVICE(MELLANOX, 0x6764), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */ - { PCI_VDEVICE(MELLANOX, 0x6746), 0 }, + { PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT26478 ConnectX2 40GigE PCIe gen2 */ - { PCI_VDEVICE(MELLANOX, 0x676e), 0 }, + { PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT }, /* MT25400 Family [ConnectX-2 Virtual Function] */ - { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_VF }, + { PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF }, /* MT27500 Family [ConnectX-3] */ { PCI_VDEVICE(MELLANOX, 0x1003), 0 }, /* MT27500 Family [ConnectX-3 Virtual Function] */ - { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_VF }, + { PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF }, { PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */ { PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */ { PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */ @@ -2295,12 +2374,12 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - int ret = __mlx4_init_one(pdev, NULL); + int ret = __mlx4_init_one(pdev, 0); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } -static struct pci_error_handlers mlx4_err_handler = { +static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, }; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index dba69d98734..1cf42036d7b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -452,6 +452,7 @@ struct mlx4_slave_state { /*initialized via the kzalloc*/ u8 is_slave_going_down; u32 cookie; + enum slave_port_state port_state[MLX4_MAX_PORTS + 1]; }; struct slave_list { @@ -472,6 +473,7 @@ struct mlx4_slave_event_eq { u32 eqn; u32 cons; u32 prod; + spinlock_t event_lock; struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE]; }; @@ -511,9 +513,9 @@ struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; struct mutex hcr_mutex; + struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; - struct semaphore slave_sem; int max_cmds; spinlock_t context_lock; int free_head; @@ -766,6 +768,11 @@ struct _rule_hw { }; }; +enum { + MLX4_PCI_DEV_IS_VF = 1 << 0, + MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1, +}; + struct mlx4_priv { struct mlx4_dev dev; @@ -773,6 +780,8 @@ struct mlx4_priv { struct list_head ctx_list; spinlock_t ctx_lock; + int pci_dev_data; + struct list_head pgdir_list; struct mutex pgdir_mutex; @@ -807,6 +816,9 @@ struct mlx4_priv { struct io_mapping *bf_mapping; int reserved_mtts; int fs_hash_mode; + u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + __be64 slave_node_guids[MLX4_MFUNC_MAX]; + }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) @@ -1011,16 +1023,61 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index e36dd0f2fa7..4c51b05efa2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -732,6 +732,16 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, new_cap_mask = ((__be32 *) inbox->buf)[1]; } + /* slave may not set the IS_SM capability for the port */ + if (slave != mlx4_master_func_num(dev) && + (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM)) + return -EINVAL; + + /* No DEV_MGMT in multifunc mode */ + if (mlx4_is_mfunc(dev) && + (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP)) + return -EINVAL; + agg_cap_mask = 0; slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index fb2b36759cb..81e2abe07bb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -67,10 +67,18 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) complete(&qp->free); } -static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp) +/* used for INIT/CLOSE port logic */ +static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) { - return qp->qpn >= dev->caps.sqp_start && - qp->qpn <= dev->caps.sqp_start + 1; + /* this procedure is called after we already know we are on the master */ + /* qp0 is either the proxy qp0, or the real qp0 */ + u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev); + *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1; + + *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn && + qp->qpn <= dev->phys_caps.base_sqpn + 1; + + return *real_qp0 || *proxy_qp0; } static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, @@ -122,6 +130,8 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_cmd_mailbox *mailbox; int ret = 0; + int real_qp0 = 0; + int proxy_qp0 = 0; u8 port; if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE || @@ -133,9 +143,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native); if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && - is_qp0(dev, qp)) { + is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { port = (qp->qpn & 1) + 1; - priv->mfunc.master.qp0_state[port].qp0_active = 0; + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; + else + priv->mfunc.master.qp0_state[port].qp0_active = 0; } return ret; } @@ -162,6 +175,23 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, new_state == MLX4_QP_STATE_RST ? 2 : 0, op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); + if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + port = (qp->qpn & 1) + 1; + if (cur_state != MLX4_QP_STATE_ERR && + cur_state != MLX4_QP_STATE_RST && + new_state == MLX4_QP_STATE_ERR) { + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; + else + priv->mfunc.master.qp0_state[port].qp0_active = 0; + } else if (new_state == MLX4_QP_STATE_RTR) { + if (proxy_qp0) + priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1; + else + priv->mfunc.master.qp0_state[port].qp0_active = 1; + } + } + mlx4_free_cmd_mailbox(dev, mailbox); return ret; } @@ -392,6 +422,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; int err; int reserved_from_top = 0; + int k; spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); @@ -406,7 +437,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * We also reserve the MSB of the 24-bit QP number to indicate * that a QP is an XRC QP. */ - dev->caps.sqp_start = + dev->phys_caps.base_sqpn = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); { @@ -437,13 +468,66 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) } + /* Reserve 8 real SQPs in both native and SRIOV modes. + * In addition, in SRIOV mode, reserve 8 proxy SQPs per function + * (for all PFs and VFs), and 8 corresponding tunnel QPs. + * Each proxy SQP works opposite its own tunnel QP. + * + * The QPs are arranged as follows: + * a. 8 real SQPs + * b. All the proxy SQPs (8 per function) + * c. All the tunnel QPs (8 per function) + */ + err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->caps.sqp_start + 8, + (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), reserved_from_top); if (err) return err; - return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start); + if (mlx4_is_mfunc(dev)) { + /* for PPF use */ + dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8; + dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX; + + /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, + * since the PF does not call mlx4_slave_caps */ + dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + + if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + err = -ENOMEM; + goto err_mem; + } + + for (k = 0; k < dev->caps.num_ports; k++) { + dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn + + 8 * mlx4_master_func_num(dev) + k; + dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX; + dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn + + 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; + dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX; + } + } + + + err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn); + if (err) + goto err_mem; + return 0; + +err_mem: + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + dev->caps.qp0_tunnel = dev->caps.qp0_proxy = + dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + return err; } void mlx4_cleanup_qp_table(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c index 11e7c1cb99b..dd1b5093d8b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/reset.c +++ b/drivers/net/ethernet/mellanox/mlx4/reset.c @@ -141,16 +141,16 @@ int mlx4_reset(struct mlx4_dev *dev) /* Now restore the PCI headers */ if (pcie_cap) { devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; - if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_DEVCTL, - devctl)) { + if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL, + devctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express " "Device Control register, aborting.\n"); goto out; } linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; - if (pci_write_config_word(dev->pdev, pcie_cap + PCI_EXP_LNKCTL, - linkctl)) { + if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL, + linkctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express " "Link control register, aborting.\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 293c9e820c4..ba6506ff4ab 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -242,6 +242,15 @@ static int res_tracker_insert(struct rb_root *root, struct res_common *res) return 0; } +enum qp_transition { + QP_TRANS_INIT2RTR, + QP_TRANS_RTR2RTS, + QP_TRANS_RTS2RTS, + QP_TRANS_SQERR2RTS, + QP_TRANS_SQD2SQD, + QP_TRANS_SQD2RTS +}; + /* For Debug uses */ static const char *ResourceType(enum mlx4_resource rt) { @@ -308,14 +317,41 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev, } } -static void update_ud_gid(struct mlx4_dev *dev, - struct mlx4_qp_context *qp_ctx, u8 slave) +static void update_pkey_index(struct mlx4_dev *dev, int slave, + struct mlx4_cmd_mailbox *inbox) { - u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + u8 sched = *(u8 *)(inbox->buf + 64); + u8 orig_index = *(u8 *)(inbox->buf + 35); + u8 new_index; + struct mlx4_priv *priv = mlx4_priv(dev); + int port; + + port = (sched >> 6 & 1) + 1; + + new_index = priv->virt2phys_pkey[slave][port - 1][orig_index]; + *(u8 *)(inbox->buf + 35) = new_index; + + mlx4_dbg(dev, "port = %d, orig pkey index = %d, " + "new pkey index = %d\n", port, orig_index, new_index); +} + +static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, + u8 slave) +{ + struct mlx4_qp_context *qp_ctx = inbox->buf + 8; + enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); + u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; if (MLX4_QP_ST_UD == ts) qp_ctx->pri_path.mgid_index = 0x80 | slave; + if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) + qp_ctx->pri_path.mgid_index = slave & 0x7F; + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) + qp_ctx->alt_path.mgid_index = slave & 0x7F; + } + mlx4_dbg(dev, "slave %d, new gid index: 0x%x ", slave, qp_ctx->pri_path.mgid_index); } @@ -360,8 +396,6 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id, r->from_state = r->state; r->state = RES_ANY_BUSY; - mlx4_dbg(dev, "res %s id 0x%llx to busy\n", - ResourceType(type), r->res_id); if (res) *((struct res_common **)res) = r; @@ -1105,7 +1139,13 @@ static void res_end_move(struct mlx4_dev *dev, int slave, static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) { - return mlx4_is_qp_reserved(dev, qpn); + return mlx4_is_qp_reserved(dev, qpn) && + (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); +} + +static int fw_reserved(struct mlx4_dev *dev, int qpn) +{ + return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; } static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, @@ -1145,7 +1185,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - if (!valid_reserved(dev, slave, qpn)) { + if (!fw_reserved(dev, qpn)) { err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); @@ -1498,7 +1538,7 @@ static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - if (!valid_reserved(dev, slave, qpn)) + if (!fw_reserved(dev, qpn)) __mlx4_qp_free_icm(dev, qpn); res_end_move(dev, slave, RES_QP, qpn); @@ -1938,6 +1978,19 @@ static u32 qp_get_srqn(struct mlx4_qp_context *qpc) return be32_to_cpu(qpc->srqn) & 0x1ffffff; } +static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr, + struct mlx4_qp_context *context) +{ + u32 qpn = vhcr->in_modifier & 0xffffff; + u32 qkey = 0; + + if (mlx4_get_parav_qkey(dev, qpn, &qkey)) + return; + + /* adjust qkey in qp context */ + context->qkey = cpu_to_be32(qkey); +} + int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1990,6 +2043,8 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, goto ex_put_scq; } + adjust_proxy_tun_qkey(dev, vhcr, qpc); + update_pkey_index(dev, slave, inbox); err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_srq; @@ -2135,6 +2190,48 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, return err; } +static int verify_qp_parameters(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *inbox, + enum qp_transition transition, u8 slave) +{ + u32 qp_type; + struct mlx4_qp_context *qp_ctx; + enum mlx4_qp_optpar optpar; + + qp_ctx = inbox->buf + 8; + qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + optpar = be32_to_cpu(*(__be32 *) inbox->buf); + + switch (qp_type) { + case MLX4_QP_ST_RC: + case MLX4_QP_ST_UC: + switch (transition) { + case QP_TRANS_INIT2RTR: + case QP_TRANS_RTR2RTS: + case QP_TRANS_RTS2RTS: + case QP_TRANS_SQD2SQD: + case QP_TRANS_SQD2RTS: + if (slave != mlx4_master_func_num(dev)) + /* slaves have only gid index 0 */ + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) + if (qp_ctx->pri_path.mgid_index) + return -EINVAL; + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) + if (qp_ctx->alt_path.mgid_index) + return -EINVAL; + break; + default: + break; + } + + break; + default: + break; + } + + return 0; +} + int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -2622,16 +2719,123 @@ out: return err; } +int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp_context *context = inbox->buf + 8; + adjust_proxy_tun_qkey(dev, vhcr, context); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + int err; struct mlx4_qp_context *qpc = inbox->buf + 8; - update_ud_gid(dev, qpc, (u8)slave); + err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, qpc); + + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + + +int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp_context *context = inbox->buf + 8; + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); + if (err) + return err; + + adjust_proxy_tun_qkey(dev, vhcr, context); + update_gid(dev, inbox, (u8)slave); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); + if (err) + return err; + adjust_proxy_tun_qkey(dev, vhcr, context); + update_gid(dev, inbox, (u8)slave); + update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } diff --git a/drivers/net/ethernet/mellanox/mlx4/sense.c b/drivers/net/ethernet/mellanox/mlx4/sense.c index 34ee09bae36..094773d88f8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/sense.c +++ b/drivers/net/ethernet/mellanox/mlx4/sense.c @@ -139,5 +139,5 @@ void mlx4_sense_init(struct mlx4_dev *dev) for (port = 1; port <= dev->caps.num_ports; port++) sense->do_sense_port[port] = 1; - INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port); + INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port); } diff --git a/drivers/net/ethernet/mipsnet.c b/drivers/net/ethernet/mipsnet.c deleted file mode 100644 index db5285befe2..00000000000 --- a/drivers/net/ethernet/mipsnet.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/platform_device.h> -#include <asm/mips-boards/simint.h> - -#define MIPSNET_VERSION "2007-11-17" - -/* - * Net status/control block as seen by sw in the core. - */ -struct mipsnet_regs { - /* - * Device info for probing, reads as MIPSNET%d where %d is some - * form of version. - */ - u64 devId; /*0x00 */ - - /* - * read only busy flag. - * Set and cleared by the Net Device to indicate that an rx or a tx - * is in progress. - */ - u32 busy; /*0x08 */ - - /* - * Set by the Net Device. - * The device will set it once data has been received. - * The value is the number of bytes that should be read from - * rxDataBuffer. The value will decrease till 0 until all the data - * from rxDataBuffer has been read. - */ - u32 rxDataCount; /*0x0c */ -#define MIPSNET_MAX_RXTX_DATACOUNT (1 << 16) - - /* - * Settable from the MIPS core, cleared by the Net Device. - * The core should set the number of bytes it wants to send, - * then it should write those bytes of data to txDataBuffer. - * The device will clear txDataCount has been processed (not - * necessarily sent). - */ - u32 txDataCount; /*0x10 */ - - /* - * Interrupt control - * - * Used to clear the interrupted generated by this dev. - * Write a 1 to clear the interrupt. (except bit31). - * - * Bit0 is set if it was a tx-done interrupt. - * Bit1 is set when new rx-data is available. - * Until this bit is cleared there will be no other RXs. - * - * Bit31 is used for testing, it clears after a read. - * Writing 1 to this bit will cause an interrupt to be generated. - * To clear the test interrupt, write 0 to this register. - */ - u32 interruptControl; /*0x14 */ -#define MIPSNET_INTCTL_TXDONE (1u << 0) -#define MIPSNET_INTCTL_RXDONE (1u << 1) -#define MIPSNET_INTCTL_TESTBIT (1u << 31) - - /* - * Readonly core-specific interrupt info for the device to signal - * the core. The meaning of the contents of this field might change. - */ - /* XXX: the whole memIntf interrupt scheme is messy: the device - * should have no control what so ever of what VPE/register set is - * being used. - * The MemIntf should only expose interrupt lines, and something in - * the config should be responsible for the line<->core/vpe bindings. - */ - u32 interruptInfo; /*0x18 */ - - /* - * This is where the received data is read out. - * There is more data to read until rxDataReady is 0. - * Only 1 byte at this regs offset is used. - */ - u32 rxDataBuffer; /*0x1c */ - - /* - * This is where the data to transmit is written. - * Data should be written for the amount specified in the - * txDataCount register. - * Only 1 byte at this regs offset is used. - */ - u32 txDataBuffer; /*0x20 */ -}; - -#define regaddr(dev, field) \ - (dev->base_addr + offsetof(struct mipsnet_regs, field)) - -static char mipsnet_string[] = "mipsnet"; - -/* - * Copy data from the MIPSNET rx data port - */ -static int ioiocpy_frommipsnet(struct net_device *dev, unsigned char *kdata, - int len) -{ - for (; len > 0; len--, kdata++) - *kdata = inb(regaddr(dev, rxDataBuffer)); - - return inl(regaddr(dev, rxDataCount)); -} - -static inline void mipsnet_put_todevice(struct net_device *dev, - struct sk_buff *skb) -{ - int count_to_go = skb->len; - char *buf_ptr = skb->data; - - outl(skb->len, regaddr(dev, txDataCount)); - - for (; count_to_go; buf_ptr++, count_to_go--) - outb(*buf_ptr, regaddr(dev, txDataBuffer)); - - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - - dev_kfree_skb(skb); -} - -static int mipsnet_xmit(struct sk_buff *skb, struct net_device *dev) -{ - /* - * Only one packet at a time. Once TXDONE interrupt is serviced, the - * queue will be restarted. - */ - netif_stop_queue(dev); - mipsnet_put_todevice(dev, skb); - - return NETDEV_TX_OK; -} - -static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t len) -{ - struct sk_buff *skb; - - if (!len) - return len; - - skb = netdev_alloc_skb(dev, len + NET_IP_ALIGN); - if (!skb) { - dev->stats.rx_dropped++; - return -ENOMEM; - } - - skb_reserve(skb, NET_IP_ALIGN); - if (ioiocpy_frommipsnet(dev, skb_put(skb, len), len)) - return -EFAULT; - - skb->protocol = eth_type_trans(skb, dev); - skb->ip_summed = CHECKSUM_UNNECESSARY; - - netif_rx(skb); - - dev->stats.rx_packets++; - dev->stats.rx_bytes += len; - - return len; -} - -static irqreturn_t mipsnet_interrupt(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - u32 int_flags; - irqreturn_t ret = IRQ_NONE; - - if (irq != dev->irq) - goto out_badirq; - - /* TESTBIT is cleared on read. */ - int_flags = inl(regaddr(dev, interruptControl)); - if (int_flags & MIPSNET_INTCTL_TESTBIT) { - /* TESTBIT takes effect after a write with 0. */ - outl(0, regaddr(dev, interruptControl)); - ret = IRQ_HANDLED; - } else if (int_flags & MIPSNET_INTCTL_TXDONE) { - /* Only one packet at a time, we are done. */ - dev->stats.tx_packets++; - netif_wake_queue(dev); - outl(MIPSNET_INTCTL_TXDONE, - regaddr(dev, interruptControl)); - ret = IRQ_HANDLED; - } else if (int_flags & MIPSNET_INTCTL_RXDONE) { - mipsnet_get_fromdev(dev, inl(regaddr(dev, rxDataCount))); - outl(MIPSNET_INTCTL_RXDONE, regaddr(dev, interruptControl)); - ret = IRQ_HANDLED; - } - return ret; - -out_badirq: - printk(KERN_INFO "%s: %s(): irq %d for unknown device\n", - dev->name, __func__, irq); - return ret; -} - -static int mipsnet_open(struct net_device *dev) -{ - int err; - - err = request_irq(dev->irq, mipsnet_interrupt, - IRQF_SHARED, dev->name, (void *) dev); - if (err) { - release_region(dev->base_addr, sizeof(struct mipsnet_regs)); - return err; - } - - netif_start_queue(dev); - - /* test interrupt handler */ - outl(MIPSNET_INTCTL_TESTBIT, regaddr(dev, interruptControl)); - - return 0; -} - -static int mipsnet_close(struct net_device *dev) -{ - netif_stop_queue(dev); - free_irq(dev->irq, dev); - return 0; -} - -static void mipsnet_set_mclist(struct net_device *dev) -{ -} - -static const struct net_device_ops mipsnet_netdev_ops = { - .ndo_open = mipsnet_open, - .ndo_stop = mipsnet_close, - .ndo_start_xmit = mipsnet_xmit, - .ndo_set_rx_mode = mipsnet_set_mclist, - .ndo_change_mtu = eth_change_mtu, - .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, -}; - -static int __devinit mipsnet_probe(struct platform_device *dev) -{ - struct net_device *netdev; - int err; - - netdev = alloc_etherdev(0); - if (!netdev) { - err = -ENOMEM; - goto out; - } - - platform_set_drvdata(dev, netdev); - - netdev->netdev_ops = &mipsnet_netdev_ops; - - /* - * TODO: probe for these or load them from PARAM - */ - netdev->base_addr = 0x4200; - netdev->irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB0 + - inl(regaddr(netdev, interruptInfo)); - - /* Get the io region now, get irq on open() */ - if (!request_region(netdev->base_addr, sizeof(struct mipsnet_regs), - "mipsnet")) { - err = -EBUSY; - goto out_free_netdev; - } - - /* - * Lacking any better mechanism to allocate a MAC address we use a - * random one ... - */ - eth_hw_addr_random(netdev); - - err = register_netdev(netdev); - if (err) { - printk(KERN_ERR "MIPSNet: failed to register netdev.\n"); - goto out_free_region; - } - - return 0; - -out_free_region: - release_region(netdev->base_addr, sizeof(struct mipsnet_regs)); - -out_free_netdev: - free_netdev(netdev); - -out: - return err; -} - -static int __devexit mipsnet_device_remove(struct platform_device *device) -{ - struct net_device *dev = platform_get_drvdata(device); - - unregister_netdev(dev); - release_region(dev->base_addr, sizeof(struct mipsnet_regs)); - free_netdev(dev); - platform_set_drvdata(device, NULL); - - return 0; -} - -static struct platform_driver mipsnet_driver = { - .driver = { - .name = mipsnet_string, - .owner = THIS_MODULE, - }, - .probe = mipsnet_probe, - .remove = __devexit_p(mipsnet_device_remove), -}; - -static int __init mipsnet_init_module(void) -{ - int err; - - printk(KERN_INFO "MIPSNet Ethernet driver. Version: %s. " - "(c)2005 MIPS Technologies, Inc.\n", MIPSNET_VERSION); - - err = platform_driver_register(&mipsnet_driver); - if (err) - printk(KERN_ERR "Driver registration failed\n"); - - return err; -} - -static void __exit mipsnet_exit_module(void) -{ - platform_driver_unregister(&mipsnet_driver); -} - -module_init(mipsnet_init_module); -module_exit(mipsnet_exit_module); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index fa85cf1353f..83516e3369c 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1078,22 +1078,16 @@ static int myri10ge_reset(struct myri10ge_priv *mgp) #ifdef CONFIG_MYRI10GE_DCA static int myri10ge_toggle_relaxed(struct pci_dev *pdev, int on) { - int ret, cap, err; + int ret; u16 ctl; - cap = pci_pcie_cap(pdev); - if (!cap) - return 0; - - err = pci_read_config_word(pdev, cap + PCI_EXP_DEVCTL, &ctl); - if (err) - return 0; + pcie_capability_read_word(pdev, PCI_EXP_DEVCTL, &ctl); ret = (ctl & PCI_EXP_DEVCTL_RELAX_EN) >> 4; if (ret != on) { ctl &= ~PCI_EXP_DEVCTL_RELAX_EN; ctl |= (on << 4); - pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, ctl); + pcie_capability_write_word(pdev, PCI_EXP_DEVCTL, ctl); } return ret; } @@ -3192,18 +3186,13 @@ static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp) struct device *dev = &mgp->pdev->dev; int cap; unsigned err_cap; - u16 val; - u8 ext_type; int ret; if (!myri10ge_ecrc_enable || !bridge) return; /* check that the bridge is a root port */ - cap = pci_pcie_cap(bridge); - pci_read_config_word(bridge, cap + PCI_CAP_FLAGS, &val); - ext_type = (val & PCI_EXP_FLAGS_TYPE) >> 4; - if (ext_type != PCI_EXP_TYPE_ROOT_PORT) { + if (pci_pcie_type(bridge) != PCI_EXP_TYPE_ROOT_PORT) { if (myri10ge_ecrc_enable > 1) { struct pci_dev *prev_bridge, *old_bridge = bridge; @@ -3218,11 +3207,8 @@ static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp) " to force ECRC\n"); return; } - cap = pci_pcie_cap(bridge); - pci_read_config_word(bridge, - cap + PCI_CAP_FLAGS, &val); - ext_type = (val & PCI_EXP_FLAGS_TYPE) >> 4; - } while (ext_type != PCI_EXP_TYPE_ROOT_PORT); + } while (pci_pcie_type(bridge) != + PCI_EXP_TYPE_ROOT_PORT); dev_info(dev, "Forcing ECRC on non-root port %s" @@ -3335,11 +3321,10 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) int overridden = 0; if (myri10ge_force_firmware == 0) { - int link_width, exp_cap; + int link_width; u16 lnk; - exp_cap = pci_pcie_cap(mgp->pdev); - pci_read_config_word(mgp->pdev, exp_cap + PCI_EXP_LNKSTA, &lnk); + pcie_capability_read_word(mgp->pdev, PCI_EXP_LNKSTA, &lnk); link_width = (lnk >> 4) & 0x3f; /* Check to see if Link is less than 8 or if the diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index d958c229937..de50547c187 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -484,7 +484,7 @@ static DEFINE_PCI_DEVICE_TABLE(s2io_tbl) = { MODULE_DEVICE_TABLE(pci, s2io_tbl); -static struct pci_error_handlers s2io_err_handler = { +static const struct pci_error_handlers s2io_err_handler = { .error_detected = s2io_io_error_detected, .slot_reset = s2io_io_slot_reset, .resume = s2io_io_resume, diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index 32d06824fe3..c2e420a84d2 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -757,7 +757,7 @@ __vxge_hw_verify_pci_e_info(struct __vxge_hw_device *hldev) u16 lnk; /* Get the negotiated link width and speed from PCI config space */ - pci_read_config_word(dev, dev->pcie_cap + PCI_EXP_LNKSTA, &lnk); + pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnk); if ((lnk & PCI_EXP_LNKSTA_CLS) != 1) return VXGE_HW_ERR_INVALID_PCI_INFO; @@ -1982,7 +1982,7 @@ u16 vxge_hw_device_link_width_get(struct __vxge_hw_device *hldev) struct pci_dev *dev = hldev->pdev; u16 lnk; - pci_read_config_word(dev, dev->pcie_cap + PCI_EXP_LNKSTA, &lnk); + pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnk); return (lnk & VXGE_HW_PCI_EXP_LNKCAP_LNK_WIDTH) >> 4; } diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index de219044351..3e5b7509502 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3521,7 +3521,7 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev) strncpy(buf, dev->name, IFNAMSIZ); - flush_work_sync(&vdev->reset_task); + flush_work(&vdev->reset_task); /* in 2.6 will call stop() if device is up */ unregister_netdev(dev); @@ -4799,7 +4799,7 @@ static void __devexit vxge_remove(struct pci_dev *pdev) __LINE__); } -static struct pci_error_handlers vxge_err_handler = { +static const struct pci_error_handlers vxge_err_handler = { .error_detected = vxge_io_error_detected, .slot_reset = vxge_io_slot_reset, .resume = vxge_io_resume, diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c index 9d11ab7521b..63e7af44366 100644 --- a/drivers/net/ethernet/netx-eth.c +++ b/drivers/net/ethernet/netx-eth.c @@ -34,7 +34,7 @@ #include <mach/netx-regs.h> #include <mach/pfifo.h> #include <mach/xc.h> -#include <mach/eth.h> +#include <linux/platform_data/eth-netx.h> /* XC Fifo Offsets */ #define EMPTY_PTR_FIFO(xcno) (0 + ((xcno) << 3)) /* Index of the empty pointer FIFO */ diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index f45def01a98..876beceaf2d 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -3409,7 +3409,7 @@ set_speed: pause_flags = 0; /* setup pause frame */ - if (np->duplex != 0) { + if (netif_running(dev) && (np->duplex != 0)) { if (np->autoneg && np->pause_flags & NV_PAUSEFRAME_AUTONEG) { adv_pause = adv & (ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM); lpa_pause = lpa & (LPA_PAUSE_CAP | LPA_PAUSE_ASYM); @@ -4435,7 +4435,7 @@ static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void regs->version = FORCEDETH_REGS_VER; spin_lock_irq(&np->lock); - for (i = 0; i <= np->register_size/sizeof(u32); i++) + for (i = 0; i < np->register_size/sizeof(u32); i++) rbuf[i] = readl(base + i*sizeof(u32)); spin_unlock_irq(&np->lock); } @@ -5455,6 +5455,7 @@ static int nv_close(struct net_device *dev) netif_stop_queue(dev); spin_lock_irq(&np->lock); + nv_update_pause(dev, 0); /* otherwise stop_tx bricks NIC */ nv_stop_rxtx(dev); nv_txrx_reset(dev); @@ -5904,11 +5905,19 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_error; } + netif_carrier_off(dev); + + /* Some NICs freeze when TX pause is enabled while NIC is + * down, and this stays across warm reboots. The sequence + * below should be enough to recover from that state. + */ + nv_update_pause(dev, 0); + nv_start_tx(dev); + nv_stop_tx(dev); + if (id->driver_data & DEV_HAS_VLAN) nv_vlan_mode(dev, dev->features); - netif_carrier_off(dev); - dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n", dev->name, np->phy_oui, np->phyaddr, dev->dev_addr); diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c index feb85d56c75..b2a94d02a52 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c @@ -2795,7 +2795,7 @@ static const struct dev_pm_ops pch_gbe_pm_ops = { }; #endif -static struct pci_error_handlers pch_gbe_err_handler = { +static const struct pci_error_handlers pch_gbe_err_handler = { .error_detected = pch_gbe_io_error_detected, .slot_reset = pch_gbe_io_slot_reset, .resume = pch_gbe_io_resume diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index a77c558d8f4..df450616ab3 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1386,7 +1386,7 @@ static void netxen_mask_aer_correctable(struct netxen_adapter *adapter) adapter->ahw.board_type != NETXEN_BRDTYPE_P3_10G_TP) return; - if (root->pcie_type != PCI_EXP_TYPE_ROOT_PORT) + if (pci_pcie_type(root) != PCI_EXP_TYPE_ROOT_PORT) return; aer_pos = pci_find_ext_capability(root, PCI_EXT_CAP_ID_ERR); @@ -3340,7 +3340,7 @@ netxen_free_vlan_ip_list(struct netxen_adapter *adapter) { } #endif -static struct pci_error_handlers netxen_err_handler = { +static const struct pci_error_handlers netxen_err_handler = { .error_detected = netxen_io_error_detected, .slot_reset = netxen_io_slot_reset, .resume = netxen_io_resume, diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 212c1219327..473ce134ca6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -4522,7 +4522,7 @@ static void qlcnic_restore_indev_addr(struct net_device *dev, unsigned long event) { } #endif -static struct pci_error_handlers qlcnic_err_handler = { +static const struct pci_error_handlers qlcnic_err_handler = { .error_detected = qlcnic_io_error_detected, .slot_reset = qlcnic_io_slot_reset, .resume = qlcnic_io_resume, diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index b53a3b60b64..b262d615681 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4847,7 +4847,7 @@ static void qlge_io_resume(struct pci_dev *pdev) netif_device_attach(ndev); } -static struct pci_error_handlers qlge_err_handler = { +static const struct pci_error_handlers qlge_err_handler = { .error_detected = qlge_io_error_detected, .slot_reset = qlge_io_slot_reset, .resume = qlge_io_resume, diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index b47d5b35024..e7ff886e804 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -77,7 +77,7 @@ static const int multicast_filter_limit = 32; #define MAX_READ_REQUEST_SHIFT 12 -#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ +#define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */ #define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */ #define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */ @@ -287,6 +287,8 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8167), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8168), 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8169), 0, 0, RTL_CFG_0 }, + { PCI_VENDOR_ID_DLINK, 0x4300, + PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0, RTL_CFG_1 }, { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4300), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_DLINK, 0x4302), 0, 0, RTL_CFG_0 }, { PCI_DEVICE(PCI_VENDOR_ID_AT, 0xc107), 0, 0, RTL_CFG_0 }, @@ -833,15 +835,8 @@ static void rtl_unlock_work(struct rtl8169_private *tp) static void rtl_tx_performance_tweak(struct pci_dev *pdev, u16 force) { - int cap = pci_pcie_cap(pdev); - - if (cap) { - u16 ctl; - - pci_read_config_word(pdev, cap + PCI_EXP_DEVCTL, &ctl); - ctl = (ctl & ~PCI_EXP_DEVCTL_READRQ) | force; - pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, ctl); - } + pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_READRQ, force); } struct rtl_cond { @@ -4739,28 +4734,14 @@ static void rtl_ephy_init(struct rtl8169_private *tp, const struct ephy_info *e, static void rtl_disable_clock_request(struct pci_dev *pdev) { - int cap = pci_pcie_cap(pdev); - - if (cap) { - u16 ctl; - - pci_read_config_word(pdev, cap + PCI_EXP_LNKCTL, &ctl); - ctl &= ~PCI_EXP_LNKCTL_CLKREQ_EN; - pci_write_config_word(pdev, cap + PCI_EXP_LNKCTL, ctl); - } + pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CLKREQ_EN); } static void rtl_enable_clock_request(struct pci_dev *pdev) { - int cap = pci_pcie_cap(pdev); - - if (cap) { - u16 ctl; - - pci_read_config_word(pdev, cap + PCI_EXP_LNKCTL, &ctl); - ctl |= PCI_EXP_LNKCTL_CLKREQ_EN; - pci_write_config_word(pdev, cap + PCI_EXP_LNKCTL, ctl); - } + pcie_capability_set_word(pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_CLKREQ_EN); } #define R8168_CPCMD_QUIRK_MASK (\ @@ -5405,14 +5386,9 @@ static void rtl_hw_start_8101(struct net_device *dev) tp->event_slow &= ~RxFIFOOver; if (tp->mac_version == RTL_GIGA_MAC_VER_13 || - tp->mac_version == RTL_GIGA_MAC_VER_16) { - int cap = pci_pcie_cap(pdev); - - if (cap) { - pci_write_config_word(pdev, cap + PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_NOSNOOP_EN); - } - } + tp->mac_version == RTL_GIGA_MAC_VER_16) + pcie_capability_set_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_NOSNOOP_EN); RTL_W8(Cfg9346, Cfg9346_Unlock); diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index df808ac8cb6..6a40dd03a32 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -99,13 +99,13 @@ typedef enum { * The SEEQ8005 doesn't like us writing to its registers * too quickly. */ -static inline void ether3_outb(int v, const void __iomem *r) +static inline void ether3_outb(int v, void __iomem *r) { writeb(v, r); udelay(1); } -static inline void ether3_outw(int v, const void __iomem *r) +static inline void ether3_outw(int v, void __iomem *r) { writew(v, r); udelay(1); diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index fb3cbc27063..25906c1d1b1 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -34,3 +34,10 @@ config SFC_SRIOV This enables support for the SFC9000 I/O Virtualization features, allowing accelerated network performance in virtualized environments. +config SFC_PTP + bool "Solarflare SFC9000-family PTP support" + depends on SFC && PTP_1588_CLOCK && !(SFC=y && PTP_1588_CLOCK=m) + default y + ---help--- + This enables support for the Precision Time Protocol (PTP) + on SFC9000-family NICs diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index ea1f8db5731..e11f2ecf69d 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -5,5 +5,6 @@ sfc-y += efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \ mcdi.o mcdi_phy.o mcdi_mon.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o +sfc-$(CONFIG_SFC_PTP) += ptp.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h index b26a954c27f..5400a33f254 100644 --- a/drivers/net/ethernet/sfc/bitfield.h +++ b/drivers/net/ethernet/sfc/bitfield.h @@ -120,10 +120,10 @@ typedef union efx_oword { * [0,high-low), with garbage in bits [high-low+1,...). */ #define EFX_EXTRACT_NATIVE(native_element, min, max, low, high) \ - (((low > max) || (high < min)) ? 0 : \ - ((low > min) ? \ - ((native_element) >> (low - min)) : \ - ((native_element) << (min - low)))) + ((low) > (max) || (high) < (min) ? 0 : \ + (low) > (min) ? \ + (native_element) >> ((low) - (min)) : \ + (native_element) << ((min) - (low))) /* * Extract bit field portion [low,high) from the 64-bit little-endian @@ -142,27 +142,27 @@ typedef union efx_oword { #define EFX_EXTRACT_OWORD64(oword, low, high) \ ((EFX_EXTRACT64((oword).u64[0], 0, 63, low, high) | \ EFX_EXTRACT64((oword).u64[1], 64, 127, low, high)) & \ - EFX_MASK64(high + 1 - low)) + EFX_MASK64((high) + 1 - (low))) #define EFX_EXTRACT_QWORD64(qword, low, high) \ (EFX_EXTRACT64((qword).u64[0], 0, 63, low, high) & \ - EFX_MASK64(high + 1 - low)) + EFX_MASK64((high) + 1 - (low))) #define EFX_EXTRACT_OWORD32(oword, low, high) \ ((EFX_EXTRACT32((oword).u32[0], 0, 31, low, high) | \ EFX_EXTRACT32((oword).u32[1], 32, 63, low, high) | \ EFX_EXTRACT32((oword).u32[2], 64, 95, low, high) | \ EFX_EXTRACT32((oword).u32[3], 96, 127, low, high)) & \ - EFX_MASK32(high + 1 - low)) + EFX_MASK32((high) + 1 - (low))) #define EFX_EXTRACT_QWORD32(qword, low, high) \ ((EFX_EXTRACT32((qword).u32[0], 0, 31, low, high) | \ EFX_EXTRACT32((qword).u32[1], 32, 63, low, high)) & \ - EFX_MASK32(high + 1 - low)) + EFX_MASK32((high) + 1 - (low))) #define EFX_EXTRACT_DWORD(dword, low, high) \ (EFX_EXTRACT32((dword).u32[0], 0, 31, low, high) & \ - EFX_MASK32(high + 1 - low)) + EFX_MASK32((high) + 1 - (low))) #define EFX_OWORD_FIELD64(oword, field) \ EFX_EXTRACT_OWORD64(oword, EFX_LOW_BIT(field), \ @@ -442,10 +442,10 @@ typedef union efx_oword { cpu_to_le32(EFX_INSERT_NATIVE(min, max, low, high, value)) #define EFX_INPLACE_MASK64(min, max, low, high) \ - EFX_INSERT64(min, max, low, high, EFX_MASK64(high + 1 - low)) + EFX_INSERT64(min, max, low, high, EFX_MASK64((high) + 1 - (low))) #define EFX_INPLACE_MASK32(min, max, low, high) \ - EFX_INSERT32(min, max, low, high, EFX_MASK32(high + 1 - low)) + EFX_INSERT32(min, max, low, high, EFX_MASK32((high) + 1 - (low))) #define EFX_SET_OWORD64(oword, low, high, value) do { \ (oword).u64[0] = (((oword).u64[0] \ diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 65a8d49106a..96bd980e828 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -202,11 +202,21 @@ static void efx_stop_all(struct efx_nic *efx); #define EFX_ASSERT_RESET_SERIALISED(efx) \ do { \ - if ((efx->state == STATE_RUNNING) || \ + if ((efx->state == STATE_READY) || \ (efx->state == STATE_DISABLED)) \ ASSERT_RTNL(); \ } while (0) +static int efx_check_disabled(struct efx_nic *efx) +{ + if (efx->state == STATE_DISABLED) { + netif_err(efx, drv, efx->net_dev, + "device is disabled due to earlier errors\n"); + return -EIO; + } + return 0; +} + /************************************************************************** * * Event queue processing @@ -630,6 +640,16 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_buffer_order = get_order(efx->rx_buffer_len + sizeof(struct efx_rx_page_state)); + /* We must keep at least one descriptor in a TX ring empty. + * We could avoid this when the queue size does not exactly + * match the hardware ring size, but it's not that important. + * Therefore we stop the queue when one more skb might fill + * the ring completely. We wake it when half way back to + * empty. + */ + efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); + efx->txq_wake_thresh = efx->txq_stop_thresh / 2; + /* Initialise the channels */ efx_for_each_channel(channel, efx) { efx_for_each_channel_tx_queue(tx_queue, channel) @@ -714,6 +734,7 @@ static void efx_remove_channel(struct efx_channel *channel) efx_for_each_possible_channel_tx_queue(tx_queue, channel) efx_remove_tx_queue(tx_queue); efx_remove_eventq(channel); + channel->type->post_remove(channel); } static void efx_remove_channels(struct efx_nic *efx) @@ -730,7 +751,11 @@ efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; u32 old_rxq_entries, old_txq_entries; unsigned i, next_buffer_table = 0; - int rc = 0; + int rc; + + rc = efx_check_disabled(efx); + if (rc) + return rc; /* Not all channels should be reallocated. We must avoid * reallocating their buffer table entries. @@ -828,6 +853,7 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) static const struct efx_channel_type efx_default_channel_type = { .pre_probe = efx_channel_dummy_op_int, + .post_remove = efx_channel_dummy_op_void, .get_name = efx_get_channel_name, .copy = efx_copy_channel, .keep_eventq = false, @@ -838,6 +864,10 @@ int efx_channel_dummy_op_int(struct efx_channel *channel) return 0; } +void efx_channel_dummy_op_void(struct efx_channel *channel) +{ +} + /************************************************************************** * * Port handling @@ -1365,6 +1395,8 @@ static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq) { struct efx_channel *channel; + BUG_ON(efx->state == STATE_DISABLED); + if (efx->legacy_irq) efx->legacy_irq_enabled = true; efx_nic_enable_interrupts(efx); @@ -1382,6 +1414,9 @@ static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq) { struct efx_channel *channel; + if (efx->state == STATE_DISABLED) + return; + efx_mcdi_mode_poll(efx); efx_nic_disable_interrupts(efx); @@ -1422,10 +1457,16 @@ static void efx_set_channels(struct efx_nic *efx) efx->tx_channel_offset = separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0; - /* We need to adjust the TX queue numbers if we have separate + /* We need to mark which channels really have RX and TX + * queues, and adjust the TX queue numbers if we have separate * RX-only and TX-only channels. */ efx_for_each_channel(channel, efx) { + if (channel->channel < efx->n_rx_channels) + channel->rx_queue.core_index = channel->channel; + else + channel->rx_queue.core_index = -1; + efx_for_each_channel_tx_queue(tx_queue, channel) tx_queue->queue -= (efx->tx_channel_offset * EFX_TXQ_TYPES); @@ -1533,22 +1574,21 @@ static int efx_probe_all(struct efx_nic *efx) return rc; } -/* Called after previous invocation(s) of efx_stop_all, restarts the port, - * kernel transmit queues and NAPI processing, and ensures that the port is - * scheduled to be reconfigured. This function is safe to call multiple - * times when the NIC is in any state. +/* If the interface is supposed to be running but is not, start + * the hardware and software data path, regular activity for the port + * (MAC statistics, link polling, etc.) and schedule the port to be + * reconfigured. Interrupts must already be enabled. This function + * is safe to call multiple times, so long as the NIC is not disabled. + * Requires the RTNL lock. */ static void efx_start_all(struct efx_nic *efx) { EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->state == STATE_DISABLED); /* Check that it is appropriate to restart the interface. All * of these flags are safe to read under just the rtnl lock */ - if (efx->port_enabled) - return; - if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) - return; - if (!netif_running(efx->net_dev)) + if (efx->port_enabled || !netif_running(efx->net_dev)) return; efx_start_port(efx); @@ -1582,11 +1622,11 @@ static void efx_flush_all(struct efx_nic *efx) cancel_work_sync(&efx->mac_work); } -/* Quiesce hardware and software without bringing the link down. - * Safe to call multiple times, when the nic and interface is in any - * state. The caller is guaranteed to subsequently be in a position - * to modify any hardware and software state they see fit without - * taking locks. */ +/* Quiesce the hardware and software data path, and regular activity + * for the port without bringing the link down. Safe to call multiple + * times with the NIC in almost any state, but interrupts should be + * enabled. Requires the RTNL lock. + */ static void efx_stop_all(struct efx_nic *efx) { EFX_ASSERT_RESET_SERIALISED(efx); @@ -1739,7 +1779,8 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) struct efx_nic *efx = netdev_priv(net_dev); struct mii_ioctl_data *data = if_mii(ifr); - EFX_ASSERT_RESET_SERIALISED(efx); + if (cmd == SIOCSHWTSTAMP) + return efx_ptp_ioctl(efx, ifr, cmd); /* Convert phy_id from older PRTAD/DEVAD format */ if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && @@ -1820,13 +1861,14 @@ static void efx_netpoll(struct net_device *net_dev) static int efx_net_open(struct net_device *net_dev) { struct efx_nic *efx = netdev_priv(net_dev); - EFX_ASSERT_RESET_SERIALISED(efx); + int rc; netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", raw_smp_processor_id()); - if (efx->state == STATE_DISABLED) - return -EIO; + rc = efx_check_disabled(efx); + if (rc) + return rc; if (efx->phy_mode & PHY_MODE_SPECIAL) return -EBUSY; if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) @@ -1852,10 +1894,8 @@ static int efx_net_stop(struct net_device *net_dev) netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", raw_smp_processor_id()); - if (efx->state != STATE_DISABLED) { - /* Stop the device and flush all the channels */ - efx_stop_all(efx); - } + /* Stop the device and flush all the channels */ + efx_stop_all(efx); return 0; } @@ -1915,9 +1955,11 @@ static void efx_watchdog(struct net_device *net_dev) static int efx_change_mtu(struct net_device *net_dev, int new_mtu) { struct efx_nic *efx = netdev_priv(net_dev); + int rc; - EFX_ASSERT_RESET_SERIALISED(efx); - + rc = efx_check_disabled(efx); + if (rc) + return rc; if (new_mtu > EFX_MAX_MTU) return -EINVAL; @@ -1926,8 +1968,6 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); mutex_lock(&efx->mac_lock); - /* Reconfigure the MAC before enabling the dma queues so that - * the RX buffers don't overflow */ net_dev->mtu = new_mtu; efx->type->reconfigure_mac(efx); mutex_unlock(&efx->mac_lock); @@ -1942,8 +1982,6 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data) struct sockaddr *addr = data; char *new_addr = addr->sa_data; - EFX_ASSERT_RESET_SERIALISED(efx); - if (!is_valid_ether_addr(new_addr)) { netif_err(efx, drv, efx->net_dev, "invalid ethernet MAC address requested: %pM\n", @@ -2079,11 +2117,27 @@ static int efx_register_netdev(struct efx_nic *efx) rtnl_lock(); + /* Enable resets to be scheduled and check whether any were + * already requested. If so, the NIC is probably hosed so we + * abort. + */ + efx->state = STATE_READY; + smp_mb(); /* ensure we change state before checking reset_pending */ + if (efx->reset_pending) { + netif_err(efx, probe, efx->net_dev, + "aborting probe due to scheduled reset\n"); + rc = -EIO; + goto fail_locked; + } + rc = dev_alloc_name(net_dev, net_dev->name); if (rc < 0) goto fail_locked; efx_update_name(efx); + /* Always start with carrier off; PHY events will detect the link */ + netif_carrier_off(net_dev); + rc = register_netdevice(net_dev); if (rc) goto fail_locked; @@ -2094,9 +2148,6 @@ static int efx_register_netdev(struct efx_nic *efx) efx_init_tx_queue_core_txq(tx_queue); } - /* Always start with carrier off; PHY events will detect the link */ - netif_carrier_off(net_dev); - rtnl_unlock(); rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); @@ -2108,14 +2159,14 @@ static int efx_register_netdev(struct efx_nic *efx) return 0; +fail_registered: + rtnl_lock(); + unregister_netdevice(net_dev); fail_locked: + efx->state = STATE_UNINIT; rtnl_unlock(); netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); return rc; - -fail_registered: - unregister_netdev(net_dev); - return rc; } static void efx_unregister_netdev(struct efx_nic *efx) @@ -2138,7 +2189,11 @@ static void efx_unregister_netdev(struct efx_nic *efx) strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); - unregister_netdev(efx->net_dev); + + rtnl_lock(); + unregister_netdevice(efx->net_dev); + efx->state = STATE_UNINIT; + rtnl_unlock(); } /************************************************************************** @@ -2154,9 +2209,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) EFX_ASSERT_RESET_SERIALISED(efx); efx_stop_all(efx); - mutex_lock(&efx->mac_lock); - efx_stop_interrupts(efx, false); + + mutex_lock(&efx->mac_lock); if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) efx->phy_op->fini(efx); efx->type->fini(efx); @@ -2276,16 +2331,15 @@ static void efx_reset_work(struct work_struct *data) if (!pending) return; - /* If we're not RUNNING then don't reset. Leave the reset_pending - * flags set so that efx_pci_probe_main will be retried */ - if (efx->state != STATE_RUNNING) { - netif_info(efx, drv, efx->net_dev, - "scheduled reset quenched. NIC not RUNNING\n"); - return; - } - rtnl_lock(); - (void)efx_reset(efx, fls(pending) - 1); + + /* We checked the state in efx_schedule_reset() but it may + * have changed by now. Now that we have the RTNL lock, + * it cannot change again. + */ + if (efx->state == STATE_READY) + (void)efx_reset(efx, fls(pending) - 1); + rtnl_unlock(); } @@ -2311,6 +2365,13 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) } set_bit(method, &efx->reset_pending); + smp_mb(); /* ensure we change reset_pending before checking state */ + + /* If we're not READY then just leave the flags set as the cue + * to abort probing or reschedule the reset later. + */ + if (ACCESS_ONCE(efx->state) != STATE_READY) + return; /* efx_process_channel() will no longer read events once a * reset is scheduled. So switch back to poll'd MCDI completions. */ @@ -2376,13 +2437,12 @@ static const struct efx_phy_operations efx_dummy_phy_operations = { /* This zeroes out and then fills in the invariants in a struct * efx_nic (including all sub-structures). */ -static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, +static int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, struct net_device *net_dev) { int i; /* Initialise common structures */ - memset(efx, 0, sizeof(*efx)); spin_lock_init(&efx->biu_lock); #ifdef CONFIG_SFC_MTD INIT_LIST_HEAD(&efx->mtd_list); @@ -2392,7 +2452,7 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); efx->pci_dev = pci_dev; efx->msg_enable = debug; - efx->state = STATE_INIT; + efx->state = STATE_UNINIT; strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); efx->net_dev = net_dev; @@ -2409,8 +2469,6 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, goto fail; } - efx->type = type; - EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS); /* Higher numbered interrupt modes are less capable! */ @@ -2455,6 +2513,12 @@ static void efx_fini_struct(struct efx_nic *efx) */ static void efx_pci_remove_main(struct efx_nic *efx) { + /* Flush reset_work. It can no longer be scheduled since we + * are not READY. + */ + BUG_ON(efx->state == STATE_READY); + cancel_work_sync(&efx->reset_work); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); efx->net_dev->rx_cpu_rmap = NULL; @@ -2480,24 +2544,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev) /* Mark the NIC as fini, then stop the interface */ rtnl_lock(); - efx->state = STATE_FINI; dev_close(efx->net_dev); - - /* Allow any queued efx_resets() to complete */ + efx_stop_interrupts(efx, false); rtnl_unlock(); - efx_stop_interrupts(efx, false); efx_sriov_fini(efx); efx_unregister_netdev(efx); efx_mtd_remove(efx); - /* Wait for any scheduled resets to complete. No more will be - * scheduled from this point because efx_stop_all() has been - * called, we are no longer registered with driverlink, and - * the net_device's have been removed. */ - cancel_work_sync(&efx->reset_work); - efx_pci_remove_main(efx); efx_fini_io(efx); @@ -2617,7 +2672,6 @@ static int efx_pci_probe_main(struct efx_nic *efx) static int __devinit efx_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *entry) { - const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data; struct net_device *net_dev; struct efx_nic *efx; int rc; @@ -2627,10 +2681,12 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, EFX_MAX_RX_QUEUES); if (!net_dev) return -ENOMEM; - net_dev->features |= (type->offload_features | NETIF_F_SG | + efx = netdev_priv(net_dev); + efx->type = (const struct efx_nic_type *) entry->driver_data; + net_dev->features |= (efx->type->offload_features | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_RXCSUM); - if (type->offload_features & NETIF_F_V6_CSUM) + if (efx->type->offload_features & NETIF_F_V6_CSUM) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | @@ -2638,10 +2694,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, NETIF_F_RXCSUM); /* All offloads can be toggled */ net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA; - efx = netdev_priv(net_dev); pci_set_drvdata(pci_dev, efx); SET_NETDEV_DEV(net_dev, &pci_dev->dev); - rc = efx_init_struct(efx, type, pci_dev, net_dev); + rc = efx_init_struct(efx, pci_dev, net_dev); if (rc) goto fail1; @@ -2656,28 +2711,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, goto fail2; rc = efx_pci_probe_main(efx); - - /* Serialise against efx_reset(). No more resets will be - * scheduled since efx_stop_all() has been called, and we have - * not and never have been registered. - */ - cancel_work_sync(&efx->reset_work); - if (rc) goto fail3; - /* If there was a scheduled reset during probe, the NIC is - * probably hosed anyway. - */ - if (efx->reset_pending) { - rc = -EIO; - goto fail4; - } - - /* Switch to the running state before we expose the device to the OS, - * so that dev_open()|efx_start_all() will actually start the device */ - efx->state = STATE_RUNNING; - rc = efx_register_netdev(efx); if (rc) goto fail4; @@ -2717,12 +2753,18 @@ static int efx_pm_freeze(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); - efx->state = STATE_FINI; + rtnl_lock(); - netif_device_detach(efx->net_dev); + if (efx->state != STATE_DISABLED) { + efx->state = STATE_UNINIT; - efx_stop_all(efx); - efx_stop_interrupts(efx, false); + netif_device_detach(efx->net_dev); + + efx_stop_all(efx); + efx_stop_interrupts(efx, false); + } + + rtnl_unlock(); return 0; } @@ -2731,21 +2773,25 @@ static int efx_pm_thaw(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); - efx->state = STATE_INIT; + rtnl_lock(); - efx_start_interrupts(efx, false); + if (efx->state != STATE_DISABLED) { + efx_start_interrupts(efx, false); - mutex_lock(&efx->mac_lock); - efx->phy_op->reconfigure(efx); - mutex_unlock(&efx->mac_lock); + mutex_lock(&efx->mac_lock); + efx->phy_op->reconfigure(efx); + mutex_unlock(&efx->mac_lock); - efx_start_all(efx); + efx_start_all(efx); - netif_device_attach(efx->net_dev); + netif_device_attach(efx->net_dev); - efx->state = STATE_RUNNING; + efx->state = STATE_READY; - efx->type->resume_wol(efx); + efx->type->resume_wol(efx); + } + + rtnl_unlock(); /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ queue_work(reset_workqueue, &efx->reset_work); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 70755c97251..f11170bc48b 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -102,6 +102,7 @@ static inline void efx_filter_rfs_expire(struct efx_channel *channel) {} /* Channels */ extern int efx_channel_dummy_op_int(struct efx_channel *channel); +extern void efx_channel_dummy_op_void(struct efx_channel *channel); extern void efx_process_channel_now(struct efx_channel *channel); extern int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries); diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 5faedd855b7..90f078eff8e 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -337,7 +337,8 @@ static int efx_fill_loopback_test(struct efx_nic *efx, unsigned int test_index, struct ethtool_string *strings, u64 *data) { - struct efx_channel *channel = efx_get_channel(efx, 0); + struct efx_channel *channel = + efx_get_channel(efx, efx->tx_channel_offset); struct efx_tx_queue *tx_queue; efx_for_each_channel_tx_queue(tx_queue, channel) { @@ -529,9 +530,7 @@ static void efx_ethtool_self_test(struct net_device *net_dev, if (!efx_tests) goto fail; - - ASSERT_RTNL(); - if (efx->state != STATE_RUNNING) { + if (efx->state != STATE_READY) { rc = -EIO; goto fail1; } @@ -962,9 +961,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, int rc; /* Check that user wants us to choose the location */ - if (rule->location != RX_CLS_LOC_ANY && - rule->location != RX_CLS_LOC_FIRST && - rule->location != RX_CLS_LOC_LAST) + if (rule->location != RX_CLS_LOC_ANY) return -EINVAL; /* Range-check ring_cookie */ @@ -978,9 +975,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, rule->m_ext.data[1])) return -EINVAL; - efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, - (rule->location == RX_CLS_LOC_FIRST) ? - EFX_FILTER_FLAG_RX_OVERRIDE_IP : 0, + efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, 0, (rule->ring_cookie == RX_CLS_FLOW_DISC) ? 0xfff : rule->ring_cookie); @@ -1176,6 +1171,7 @@ const struct ethtool_ops efx_ethtool_ops = { .get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size, .get_rxfh_indir = efx_ethtool_get_rxfh_indir, .set_rxfh_indir = efx_ethtool_set_rxfh_indir, + .get_ts_info = efx_ptp_get_ts_info, .get_module_info = efx_ethtool_get_module_info, .get_module_eeprom = efx_ethtool_get_module_eeprom, }; diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c index 8687a6c3db0..ec1e99d0dca 100644 --- a/drivers/net/ethernet/sfc/falcon_boards.c +++ b/drivers/net/ethernet/sfc/falcon_boards.c @@ -380,7 +380,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev, new_mode = PHY_MODE_SPECIAL; if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) { err = 0; - } else if (efx->state != STATE_RUNNING || netif_running(efx->net_dev)) { + } else if (efx->state != STATE_READY || netif_running(efx->net_dev)) { err = -EBUSY; } else { /* Reset the PHY, reconfigure the MAC and enable/disable diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c index c3fd61f0a95..8af42cd1fed 100644 --- a/drivers/net/ethernet/sfc/filter.c +++ b/drivers/net/ethernet/sfc/filter.c @@ -162,20 +162,12 @@ static void efx_filter_push_rx_config(struct efx_nic *efx) !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags & EFX_FILTER_FLAG_RX_RSS)); EFX_SET_OWORD_FIELD( - filter_ctl, FRF_CZ_UNICAST_NOMATCH_IP_OVERRIDE, - !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags & - EFX_FILTER_FLAG_RX_OVERRIDE_IP)); - EFX_SET_OWORD_FIELD( filter_ctl, FRF_CZ_MULTICAST_NOMATCH_Q_ID, table->spec[EFX_FILTER_INDEX_MC_DEF].dmaq_id); EFX_SET_OWORD_FIELD( filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED, !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & EFX_FILTER_FLAG_RX_RSS)); - EFX_SET_OWORD_FIELD( - filter_ctl, FRF_CZ_MULTICAST_NOMATCH_IP_OVERRIDE, - !!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags & - EFX_FILTER_FLAG_RX_OVERRIDE_IP)); } efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL); @@ -480,14 +472,12 @@ static u32 efx_filter_build(efx_oword_t *filter, struct efx_filter_spec *spec) case EFX_FILTER_TABLE_RX_MAC: { bool is_wild = spec->type == EFX_FILTER_MAC_WILD; - EFX_POPULATE_OWORD_8( + EFX_POPULATE_OWORD_7( *filter, FRF_CZ_RMFT_RSS_EN, !!(spec->flags & EFX_FILTER_FLAG_RX_RSS), FRF_CZ_RMFT_SCATTER_EN, !!(spec->flags & EFX_FILTER_FLAG_RX_SCATTER), - FRF_CZ_RMFT_IP_OVERRIDE, - !!(spec->flags & EFX_FILTER_FLAG_RX_OVERRIDE_IP), FRF_CZ_RMFT_RXQ_ID, spec->dmaq_id, FRF_CZ_RMFT_WILDCARD_MATCH, is_wild, FRF_CZ_RMFT_DEST_MAC_HI, spec->data[2], @@ -567,49 +557,62 @@ static int efx_filter_search(struct efx_filter_table *table, } /* - * Construct/deconstruct external filter IDs. These must be ordered - * by matching priority, for RX NFC semantics. + * Construct/deconstruct external filter IDs. At least the RX filter + * IDs must be ordered by matching priority, for RX NFC semantics. * - * Each RX MAC filter entry has a flag for whether it can override an - * RX IP filter that also matches. So we assign locations for MAC - * filters with overriding behaviour, then for IP filters, then for - * MAC filters without overriding behaviour. + * Deconstruction needs to be robust against invalid IDs so that + * efx_filter_remove_id_safe() and efx_filter_get_filter_safe() can + * accept user-provided IDs. */ -#define EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP 0 -#define EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP 1 -#define EFX_FILTER_MATCH_PRI_NORMAL_BASE 2 +#define EFX_FILTER_MATCH_PRI_COUNT 5 + +static const u8 efx_filter_type_match_pri[EFX_FILTER_TYPE_COUNT] = { + [EFX_FILTER_TCP_FULL] = 0, + [EFX_FILTER_UDP_FULL] = 0, + [EFX_FILTER_TCP_WILD] = 1, + [EFX_FILTER_UDP_WILD] = 1, + [EFX_FILTER_MAC_FULL] = 2, + [EFX_FILTER_MAC_WILD] = 3, + [EFX_FILTER_UC_DEF] = 4, + [EFX_FILTER_MC_DEF] = 4, +}; + +static const enum efx_filter_table_id efx_filter_range_table[] = { + EFX_FILTER_TABLE_RX_IP, /* RX match pri 0 */ + EFX_FILTER_TABLE_RX_IP, + EFX_FILTER_TABLE_RX_MAC, + EFX_FILTER_TABLE_RX_MAC, + EFX_FILTER_TABLE_RX_DEF, /* RX match pri 4 */ + EFX_FILTER_TABLE_COUNT, /* TX match pri 0; invalid */ + EFX_FILTER_TABLE_COUNT, /* invalid */ + EFX_FILTER_TABLE_TX_MAC, + EFX_FILTER_TABLE_TX_MAC, /* TX match pri 3 */ +}; #define EFX_FILTER_INDEX_WIDTH 13 #define EFX_FILTER_INDEX_MASK ((1 << EFX_FILTER_INDEX_WIDTH) - 1) -static inline u32 efx_filter_make_id(enum efx_filter_table_id table_id, - unsigned int index, u8 flags) +static inline u32 +efx_filter_make_id(const struct efx_filter_spec *spec, unsigned int index) { - unsigned int match_pri = EFX_FILTER_MATCH_PRI_NORMAL_BASE + table_id; + unsigned int range; - if (flags & EFX_FILTER_FLAG_RX_OVERRIDE_IP) { - if (table_id == EFX_FILTER_TABLE_RX_MAC) - match_pri = EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP; - else if (table_id == EFX_FILTER_TABLE_RX_DEF) - match_pri = EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP; - } + range = efx_filter_type_match_pri[spec->type]; + if (!(spec->flags & EFX_FILTER_FLAG_RX)) + range += EFX_FILTER_MATCH_PRI_COUNT; - return match_pri << EFX_FILTER_INDEX_WIDTH | index; + return range << EFX_FILTER_INDEX_WIDTH | index; } static inline enum efx_filter_table_id efx_filter_id_table_id(u32 id) { - unsigned int match_pri = id >> EFX_FILTER_INDEX_WIDTH; + unsigned int range = id >> EFX_FILTER_INDEX_WIDTH; - switch (match_pri) { - case EFX_FILTER_MATCH_PRI_RX_MAC_OVERRIDE_IP: - return EFX_FILTER_TABLE_RX_MAC; - case EFX_FILTER_MATCH_PRI_RX_DEF_OVERRIDE_IP: - return EFX_FILTER_TABLE_RX_DEF; - default: - return match_pri - EFX_FILTER_MATCH_PRI_NORMAL_BASE; - } + if (range < ARRAY_SIZE(efx_filter_range_table)) + return efx_filter_range_table[range]; + else + return EFX_FILTER_TABLE_COUNT; /* invalid */ } static inline unsigned int efx_filter_id_index(u32 id) @@ -619,12 +622,9 @@ static inline unsigned int efx_filter_id_index(u32 id) static inline u8 efx_filter_id_flags(u32 id) { - unsigned int match_pri = id >> EFX_FILTER_INDEX_WIDTH; + unsigned int range = id >> EFX_FILTER_INDEX_WIDTH; - if (match_pri < EFX_FILTER_MATCH_PRI_NORMAL_BASE) - return EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_OVERRIDE_IP; - else if (match_pri <= - EFX_FILTER_MATCH_PRI_NORMAL_BASE + EFX_FILTER_TABLE_RX_DEF) + if (range < EFX_FILTER_MATCH_PRI_COUNT) return EFX_FILTER_FLAG_RX; else return EFX_FILTER_FLAG_TX; @@ -633,14 +633,15 @@ static inline u8 efx_filter_id_flags(u32 id) u32 efx_filter_get_rx_id_limit(struct efx_nic *efx) { struct efx_filter_state *state = efx->filter_state; - unsigned int table_id = EFX_FILTER_TABLE_RX_DEF; + unsigned int range = EFX_FILTER_MATCH_PRI_COUNT - 1; + enum efx_filter_table_id table_id; do { + table_id = efx_filter_range_table[range]; if (state->table[table_id].size != 0) - return ((EFX_FILTER_MATCH_PRI_NORMAL_BASE + table_id) - << EFX_FILTER_INDEX_WIDTH) + + return range << EFX_FILTER_INDEX_WIDTH | state->table[table_id].size; - } while (table_id--); + } while (range--); return 0; } @@ -718,7 +719,7 @@ s32 efx_filter_insert_filter(struct efx_nic *efx, struct efx_filter_spec *spec, netif_vdbg(efx, hw, efx->net_dev, "%s: filter type %d index %d rxq %u set", __func__, spec->type, filter_idx, spec->dmaq_id); - rc = efx_filter_make_id(table->id, filter_idx, spec->flags); + rc = efx_filter_make_id(spec, filter_idx); out: spin_unlock_bh(&state->lock); @@ -781,8 +782,7 @@ int efx_filter_remove_id_safe(struct efx_nic *efx, spin_lock_bh(&state->lock); if (test_bit(filter_idx, table->used_bitmap) && - spec->priority == priority && - !((spec->flags ^ filter_flags) & EFX_FILTER_FLAG_RX_OVERRIDE_IP)) { + spec->priority == priority) { efx_filter_table_clear_entry(efx, table, filter_idx); if (table->used == 0) efx_filter_table_reset_search_depth(table); @@ -833,8 +833,7 @@ int efx_filter_get_filter_safe(struct efx_nic *efx, spin_lock_bh(&state->lock); if (test_bit(filter_idx, table->used_bitmap) && - spec->priority == priority && - !((spec->flags ^ filter_flags) & EFX_FILTER_FLAG_RX_OVERRIDE_IP)) { + spec->priority == priority) { *spec_buf = *spec; rc = 0; } else { @@ -927,8 +926,7 @@ s32 efx_filter_get_rx_ids(struct efx_nic *efx, goto out; } buf[count++] = efx_filter_make_id( - table_id, filter_idx, - table->spec[filter_idx].flags); + &table->spec[filter_idx], filter_idx); } } } diff --git a/drivers/net/ethernet/sfc/filter.h b/drivers/net/ethernet/sfc/filter.h index 3c77802aed6..5cb54723b82 100644 --- a/drivers/net/ethernet/sfc/filter.h +++ b/drivers/net/ethernet/sfc/filter.h @@ -61,16 +61,12 @@ enum efx_filter_priority { * according to the indirection table. * @EFX_FILTER_FLAG_RX_SCATTER: Enable DMA scatter on the receiving * queue. - * @EFX_FILTER_FLAG_RX_OVERRIDE_IP: Enables a MAC filter to override - * any IP filter that matches the same packet. By default, IP - * filters take precedence. * @EFX_FILTER_FLAG_RX: Filter is for RX * @EFX_FILTER_FLAG_TX: Filter is for TX */ enum efx_filter_flags { EFX_FILTER_FLAG_RX_RSS = 0x01, EFX_FILTER_FLAG_RX_SCATTER = 0x02, - EFX_FILTER_FLAG_RX_OVERRIDE_IP = 0x04, EFX_FILTER_FLAG_RX = 0x08, EFX_FILTER_FLAG_TX = 0x10, }; @@ -88,8 +84,7 @@ enum efx_filter_flags { * * The @priority field is used by software to determine whether a new * filter may replace an old one. The hardware priority of a filter - * depends on the filter type and %EFX_FILTER_FLAG_RX_OVERRIDE_IP - * flag. + * depends on the filter type. */ struct efx_filter_spec { u8 type:4; diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index fc5e7bbcbc9..aea43cbd052 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -320,14 +320,20 @@ static void efx_mcdi_ev_cpl(struct efx_nic *efx, unsigned int seqno, efx_mcdi_complete(mcdi); } -/* Issue the given command by writing the data into the shared memory PDU, - * ring the doorbell and wait for completion. Copyout the result. */ int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen, size_t *outlen_actual) { + efx_mcdi_rpc_start(efx, cmd, inbuf, inlen); + return efx_mcdi_rpc_finish(efx, cmd, inlen, + outbuf, outlen, outlen_actual); +} + +void efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, + size_t inlen) +{ struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - int rc; + BUG_ON(efx_nic_rev(efx) < EFX_REV_SIENA_A0); efx_mcdi_acquire(mcdi); @@ -338,6 +344,15 @@ int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, spin_unlock_bh(&mcdi->iface_lock); efx_mcdi_copyin(efx, cmd, inbuf, inlen); +} + +int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, + u8 *outbuf, size_t outlen, size_t *outlen_actual) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + int rc; + + BUG_ON(efx_nic_rev(efx) < EFX_REV_SIENA_A0); if (mcdi->mode == MCDI_MODE_POLL) rc = efx_mcdi_poll(efx); @@ -563,6 +578,11 @@ void efx_mcdi_process_event(struct efx_channel *channel, case MCDI_EVENT_CODE_FLR: efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF)); break; + case MCDI_EVENT_CODE_PTP_RX: + case MCDI_EVENT_CODE_PTP_FAULT: + case MCDI_EVENT_CODE_PTP_PPS: + efx_ptp_event(efx, event); + break; default: netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n", @@ -641,9 +661,8 @@ int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address, u16 *fw_subtype_list, u32 *capabilities) { uint8_t outbuf[MC_CMD_GET_BOARD_CFG_OUT_LENMIN]; - size_t outlen; + size_t outlen, offset, i; int port_num = efx_port_num(efx); - int offset; int rc; BUILD_BUG_ON(MC_CMD_GET_BOARD_CFG_IN_LEN != 0); @@ -663,11 +682,18 @@ int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address, : MC_CMD_GET_BOARD_CFG_OUT_MAC_ADDR_BASE_PORT0_OFST; if (mac_address) memcpy(mac_address, outbuf + offset, ETH_ALEN); - if (fw_subtype_list) - memcpy(fw_subtype_list, - outbuf + MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_OFST, - MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MINNUM * - sizeof(fw_subtype_list[0])); + if (fw_subtype_list) { + /* Byte-swap and truncate or zero-pad as necessary */ + offset = MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_OFST; + for (i = 0; + i < MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM; + i++) { + fw_subtype_list[i] = + (offset + 2 <= outlen) ? + le16_to_cpup((__le16 *)(outbuf + offset)) : 0; + offset += 2; + } + } if (capabilities) { if (port_num) *capabilities = MCDI_DWORD(outbuf, @@ -1169,6 +1195,9 @@ int efx_mcdi_flush_rxqs(struct efx_nic *efx) __le32 *qid; int rc, count; + BUILD_BUG_ON(EFX_MAX_CHANNELS > + MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM); + qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL); if (qid == NULL) return -ENOMEM; diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 0bdf3e33183..3ba2e5b5a9c 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -71,6 +71,12 @@ extern int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const u8 *inbuf, size_t inlen, u8 *outbuf, size_t outlen, size_t *outlen_actual); +extern void efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, + const u8 *inbuf, size_t inlen); +extern int efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, + u8 *outbuf, size_t outlen, + size_t *outlen_actual); + extern int efx_mcdi_poll_reboot(struct efx_nic *efx); extern void efx_mcdi_mode_poll(struct efx_nic *efx); extern void efx_mcdi_mode_event(struct efx_nic *efx); @@ -107,11 +113,13 @@ extern void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); #define MCDI_EVENT_FIELD(_ev, _field) \ EFX_QWORD_FIELD(_ev, MCDI_EVENT_ ## _field) #define MCDI_ARRAY_FIELD(_buf, _field1, _type, _index, _field2) \ - EFX_DWORD_FIELD( \ + EFX_EXTRACT_DWORD( \ *((efx_dword_t *) \ (MCDI_ARRAY_PTR(_buf, _field1, _type, _index) + \ (MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _OFST & ~3))), \ - MC_CMD_ ## _type ## _TYPEDEF_ ## _field2) + MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _LBN & 0x1f, \ + (MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _LBN & 0x1f) + \ + MC_CMD_ ## _type ## _TYPEDEF_ ## _field2 ## _WIDTH - 1) extern void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len); extern int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index db4beed9766..9d426d0457b 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -289,6 +289,7 @@ #define MCDI_EVENT_CODE_TX_FLUSH 0xc /* enum */ #define MCDI_EVENT_CODE_PTP_RX 0xd /* enum */ #define MCDI_EVENT_CODE_PTP_FAULT 0xe /* enum */ +#define MCDI_EVENT_CODE_PTP_PPS 0xf /* enum */ #define MCDI_EVENT_CMDDONE_DATA_OFST 0 #define MCDI_EVENT_CMDDONE_DATA_LBN 0 #define MCDI_EVENT_CMDDONE_DATA_WIDTH 32 @@ -491,12 +492,12 @@ /* MC_CMD_GET_FPGAREG_OUT msgresponse */ #define MC_CMD_GET_FPGAREG_OUT_LENMIN 1 -#define MC_CMD_GET_FPGAREG_OUT_LENMAX 255 +#define MC_CMD_GET_FPGAREG_OUT_LENMAX 252 #define MC_CMD_GET_FPGAREG_OUT_LEN(num) (0+1*(num)) #define MC_CMD_GET_FPGAREG_OUT_BUFFER_OFST 0 #define MC_CMD_GET_FPGAREG_OUT_BUFFER_LEN 1 #define MC_CMD_GET_FPGAREG_OUT_BUFFER_MINNUM 1 -#define MC_CMD_GET_FPGAREG_OUT_BUFFER_MAXNUM 255 +#define MC_CMD_GET_FPGAREG_OUT_BUFFER_MAXNUM 252 /***********************************/ @@ -507,13 +508,13 @@ /* MC_CMD_PUT_FPGAREG_IN msgrequest */ #define MC_CMD_PUT_FPGAREG_IN_LENMIN 5 -#define MC_CMD_PUT_FPGAREG_IN_LENMAX 255 +#define MC_CMD_PUT_FPGAREG_IN_LENMAX 252 #define MC_CMD_PUT_FPGAREG_IN_LEN(num) (4+1*(num)) #define MC_CMD_PUT_FPGAREG_IN_ADDR_OFST 0 #define MC_CMD_PUT_FPGAREG_IN_BUFFER_OFST 4 #define MC_CMD_PUT_FPGAREG_IN_BUFFER_LEN 1 #define MC_CMD_PUT_FPGAREG_IN_BUFFER_MINNUM 1 -#define MC_CMD_PUT_FPGAREG_IN_BUFFER_MAXNUM 251 +#define MC_CMD_PUT_FPGAREG_IN_BUFFER_MAXNUM 248 /* MC_CMD_PUT_FPGAREG_OUT msgresponse */ #define MC_CMD_PUT_FPGAREG_OUT_LEN 0 @@ -560,7 +561,7 @@ /* MC_CMD_PTP_IN_TRANSMIT msgrequest */ #define MC_CMD_PTP_IN_TRANSMIT_LENMIN 13 -#define MC_CMD_PTP_IN_TRANSMIT_LENMAX 255 +#define MC_CMD_PTP_IN_TRANSMIT_LENMAX 252 #define MC_CMD_PTP_IN_TRANSMIT_LEN(num) (12+1*(num)) /* MC_CMD_PTP_IN_CMD_OFST 0 */ /* MC_CMD_PTP_IN_PERIPH_ID_OFST 4 */ @@ -568,7 +569,7 @@ #define MC_CMD_PTP_IN_TRANSMIT_PACKET_OFST 12 #define MC_CMD_PTP_IN_TRANSMIT_PACKET_LEN 1 #define MC_CMD_PTP_IN_TRANSMIT_PACKET_MINNUM 1 -#define MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM 243 +#define MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM 240 /* MC_CMD_PTP_IN_READ_NIC_TIME msgrequest */ #define MC_CMD_PTP_IN_READ_NIC_TIME_LEN 8 @@ -1145,7 +1146,7 @@ /* MC_CMD_PUTS_IN msgrequest */ #define MC_CMD_PUTS_IN_LENMIN 13 -#define MC_CMD_PUTS_IN_LENMAX 255 +#define MC_CMD_PUTS_IN_LENMAX 252 #define MC_CMD_PUTS_IN_LEN(num) (12+1*(num)) #define MC_CMD_PUTS_IN_DEST_OFST 0 #define MC_CMD_PUTS_IN_UART_LBN 0 @@ -1157,7 +1158,7 @@ #define MC_CMD_PUTS_IN_STRING_OFST 12 #define MC_CMD_PUTS_IN_STRING_LEN 1 #define MC_CMD_PUTS_IN_STRING_MINNUM 1 -#define MC_CMD_PUTS_IN_STRING_MAXNUM 243 +#define MC_CMD_PUTS_IN_STRING_MAXNUM 240 /* MC_CMD_PUTS_OUT msgresponse */ #define MC_CMD_PUTS_OUT_LEN 0 @@ -1947,12 +1948,12 @@ /* MC_CMD_NVRAM_READ_OUT msgresponse */ #define MC_CMD_NVRAM_READ_OUT_LENMIN 1 -#define MC_CMD_NVRAM_READ_OUT_LENMAX 255 +#define MC_CMD_NVRAM_READ_OUT_LENMAX 252 #define MC_CMD_NVRAM_READ_OUT_LEN(num) (0+1*(num)) #define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_OFST 0 #define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_LEN 1 #define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MINNUM 1 -#define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MAXNUM 255 +#define MC_CMD_NVRAM_READ_OUT_READ_BUFFER_MAXNUM 252 /***********************************/ @@ -1963,7 +1964,7 @@ /* MC_CMD_NVRAM_WRITE_IN msgrequest */ #define MC_CMD_NVRAM_WRITE_IN_LENMIN 13 -#define MC_CMD_NVRAM_WRITE_IN_LENMAX 255 +#define MC_CMD_NVRAM_WRITE_IN_LENMAX 252 #define MC_CMD_NVRAM_WRITE_IN_LEN(num) (12+1*(num)) #define MC_CMD_NVRAM_WRITE_IN_TYPE_OFST 0 /* Enum values, see field(s): */ @@ -1973,7 +1974,7 @@ #define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_OFST 12 #define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_LEN 1 #define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MINNUM 1 -#define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM 243 +#define MC_CMD_NVRAM_WRITE_IN_WRITE_BUFFER_MAXNUM 240 /* MC_CMD_NVRAM_WRITE_OUT msgresponse */ #define MC_CMD_NVRAM_WRITE_OUT_LEN 0 @@ -2305,13 +2306,13 @@ /* MC_CMD_GET_PHY_MEDIA_INFO_OUT msgresponse */ #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMIN 5 -#define MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX 255 +#define MC_CMD_GET_PHY_MEDIA_INFO_OUT_LENMAX 252 #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_LEN(num) (4+1*(num)) #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATALEN_OFST 0 #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_OFST 4 #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_LEN 1 #define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MINNUM 1 -#define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MAXNUM 251 +#define MC_CMD_GET_PHY_MEDIA_INFO_OUT_DATA_MAXNUM 248 /***********************************/ diff --git a/drivers/net/ethernet/sfc/mtd.c b/drivers/net/ethernet/sfc/mtd.c index 758148379b0..08f825b71ac 100644 --- a/drivers/net/ethernet/sfc/mtd.c +++ b/drivers/net/ethernet/sfc/mtd.c @@ -585,6 +585,7 @@ static const struct siena_nvram_type_info siena_nvram_types[] = { [MC_CMD_NVRAM_TYPE_EXP_ROM_CFG_PORT1] = { 1, "sfc_exp_rom_cfg" }, [MC_CMD_NVRAM_TYPE_PHY_PORT0] = { 0, "sfc_phy_fw" }, [MC_CMD_NVRAM_TYPE_PHY_PORT1] = { 1, "sfc_phy_fw" }, + [MC_CMD_NVRAM_TYPE_FPGA] = { 0, "sfc_fpga" }, }; static int siena_mtd_probe_partition(struct efx_nic *efx, @@ -598,7 +599,8 @@ static int siena_mtd_probe_partition(struct efx_nic *efx, bool protected; int rc; - if (type >= ARRAY_SIZE(siena_nvram_types)) + if (type >= ARRAY_SIZE(siena_nvram_types) || + siena_nvram_types[type].name == NULL) return -ENODEV; info = &siena_nvram_types[type]; @@ -627,7 +629,8 @@ static int siena_mtd_get_fw_subtypes(struct efx_nic *efx, struct efx_mtd *efx_mtd) { struct efx_mtd_partition *part; - uint16_t fw_subtype_list[MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MINNUM]; + uint16_t fw_subtype_list[ + MC_CMD_GET_BOARD_CFG_OUT_FW_SUBTYPE_LIST_MAXNUM]; int rc; rc = efx_mcdi_get_board_cfg(efx, NULL, fw_subtype_list, NULL); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index cd9c0a98969..c1a010cda89 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -37,7 +37,7 @@ * **************************************************************************/ -#define EFX_DRIVER_VERSION "3.1" +#define EFX_DRIVER_VERSION "3.2" #ifdef DEBUG #define EFX_BUG_ON_PARANOID(x) BUG_ON(x) @@ -56,7 +56,8 @@ #define EFX_MAX_CHANNELS 32U #define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS #define EFX_EXTRA_CHANNEL_IOV 0 -#define EFX_MAX_EXTRA_CHANNELS 1U +#define EFX_EXTRA_CHANNEL_PTP 1 +#define EFX_MAX_EXTRA_CHANNELS 2U /* Checksum generation is a per-queue option in hardware, so each * queue visible to the networking core is backed by two hardware TX @@ -68,6 +69,9 @@ #define EFX_TXQ_TYPES 4 #define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS) +/* Forward declare Precision Time Protocol (PTP) support structure. */ +struct efx_ptp_data; + struct efx_self_tests; /** @@ -91,29 +95,31 @@ struct efx_special_buffer { }; /** - * struct efx_tx_buffer - An Efx TX buffer - * @skb: The associated socket buffer. - * Set only on the final fragment of a packet; %NULL for all other - * fragments. When this fragment completes, then we can free this - * skb. - * @tsoh: The associated TSO header structure, or %NULL if this - * buffer is not a TSO header. + * struct efx_tx_buffer - buffer state for a TX descriptor + * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be + * freed when descriptor completes + * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be + * freed when descriptor completes. * @dma_addr: DMA address of the fragment. + * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. * This field is zero when the queue slot is empty. - * @continuation: True if this fragment is not the end of a packet. - * @unmap_single: True if dma_unmap_single should be used. * @unmap_len: Length of this fragment to unmap */ struct efx_tx_buffer { - const struct sk_buff *skb; - struct efx_tso_header *tsoh; + union { + const struct sk_buff *skb; + void *heap_buf; + }; dma_addr_t dma_addr; + unsigned short flags; unsigned short len; - bool continuation; - bool unmap_single; unsigned short unmap_len; }; +#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */ +#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ +#define EFX_TX_BUF_HEAP 4 /* buffer was allocated with kmalloc() */ +#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ /** * struct efx_tx_queue - An Efx TX queue @@ -133,6 +139,7 @@ struct efx_tx_buffer { * @channel: The associated channel * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring + * @tsoh_page: Array of pages of TSO header buffers * @txd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. * @initialised: Has hardware queue been initialised? @@ -156,9 +163,6 @@ struct efx_tx_buffer { * variable indicates that the queue is full. This is to * avoid cache-line ping-pong between the xmit path and the * completion path. - * @tso_headers_free: A list of TSO headers allocated for this TX queue - * that are not in use, and so available for new TSO sends. The list - * is protected by the TX queue lock. * @tso_bursts: Number of times TSO xmit invoked by kernel * @tso_long_headers: Number of packets with headers too long for standard * blocks @@ -175,6 +179,7 @@ struct efx_tx_queue { struct efx_channel *channel; struct netdev_queue *core_txq; struct efx_tx_buffer *buffer; + struct efx_buffer *tsoh_page; struct efx_special_buffer txd; unsigned int ptr_mask; bool initialised; @@ -187,7 +192,6 @@ struct efx_tx_queue { unsigned int insert_count ____cacheline_aligned_in_smp; unsigned int write_count; unsigned int old_read_count; - struct efx_tso_header *tso_headers_free; unsigned int tso_bursts; unsigned int tso_long_headers; unsigned int tso_packets; @@ -242,6 +246,8 @@ struct efx_rx_page_state { /** * struct efx_rx_queue - An Efx RX queue * @efx: The associated Efx NIC + * @core_index: Index of network core RX queue. Will be >= 0 iff this + * is associated with a real RX queue. * @buffer: The software buffer ring * @rxd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. @@ -263,6 +269,7 @@ struct efx_rx_page_state { */ struct efx_rx_queue { struct efx_nic *efx; + int core_index; struct efx_rx_buffer *buffer; struct efx_special_buffer rxd; unsigned int ptr_mask; @@ -390,14 +397,17 @@ struct efx_channel { * @get_name: Generate the channel's name (used for its IRQ handler) * @copy: Copy the channel state prior to reallocation. May be %NULL if * reallocation is not supported. + * @receive_skb: Handle an skb ready to be passed to netif_receive_skb() * @keep_eventq: Flag for whether event queue should be kept initialised * while the device is stopped */ struct efx_channel_type { void (*handle_no_channel)(struct efx_nic *); int (*pre_probe)(struct efx_channel *); + void (*post_remove)(struct efx_channel *); void (*get_name)(struct efx_channel *, char *buf, size_t len); struct efx_channel *(*copy)(const struct efx_channel *); + void (*receive_skb)(struct efx_channel *, struct sk_buff *); bool keep_eventq; }; @@ -430,11 +440,9 @@ enum efx_int_mode { #define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI) enum nic_state { - STATE_INIT = 0, - STATE_RUNNING = 1, - STATE_FINI = 2, - STATE_DISABLED = 3, - STATE_MAX, + STATE_UNINIT = 0, /* device being probed/removed or is frozen */ + STATE_READY = 1, /* hardware ready and netdev registered */ + STATE_DISABLED = 2, /* device disabled due to hardware errors */ }; /* @@ -654,7 +662,7 @@ struct vfdi_status; * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues * @irq_rx_moderation: IRQ moderation time for RX event queues * @msg_enable: Log message enable flags - * @state: Device state flag. Serialised by the rtnl_lock. + * @state: Device state number (%STATE_*). Serialised by the rtnl_lock. * @reset_pending: Bitmask for pending resets * @tx_queue: TX DMA queues * @rx_queue: RX DMA queues @@ -664,6 +672,8 @@ struct vfdi_status; * should be allocated for this NIC * @rxq_entries: Size of receive queues requested by user. * @txq_entries: Size of transmit queues requested by user. + * @txq_stop_thresh: TX queue fill level at or above which we stop it. + * @txq_wake_thresh: TX queue fill level at or below which we wake it. * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches * @sram_lim_qw: Qword address limit of SRAM @@ -730,6 +740,7 @@ struct vfdi_status; * %local_addr_list. Protected by %local_lock. * @local_lock: Mutex protecting %local_addr_list and %local_page_list. * @peer_work: Work item to broadcast peer addresses to VMs. + * @ptp_data: PTP state data * @monitor_work: Hardware monitor workitem * @biu_lock: BIU (bus interface unit) lock * @last_irq_cpu: Last CPU to handle a possible test interrupt. This @@ -774,6 +785,9 @@ struct efx_nic { unsigned rxq_entries; unsigned txq_entries; + unsigned int txq_stop_thresh; + unsigned int txq_wake_thresh; + unsigned tx_dc_base; unsigned rx_dc_base; unsigned sram_lim_qw; @@ -854,6 +868,10 @@ struct efx_nic { struct work_struct peer_work; #endif +#ifdef CONFIG_SFC_PTP + struct efx_ptp_data *ptp_data; +#endif + /* The following fields may be written more often */ struct delayed_work monitor_work ____cacheline_aligned_in_smp; @@ -1044,7 +1062,7 @@ static inline bool efx_tx_queue_used(struct efx_tx_queue *tx_queue) static inline bool efx_channel_has_rx_queue(struct efx_channel *channel) { - return channel->channel < channel->efx->n_rx_channels; + return channel->rx_queue.core_index >= 0; } static inline struct efx_rx_queue * @@ -1116,5 +1134,13 @@ static inline void clear_bit_le(unsigned nr, unsigned char *addr) #define EFX_MAX_FRAME_LEN(mtu) \ ((((mtu) + ETH_HLEN + VLAN_HLEN + 4/* FCS */ + 7) & ~7) + 16) +static inline bool efx_xmit_with_hwtstamp(struct sk_buff *skb) +{ + return skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP; +} +static inline void efx_xmit_hwtstamp_pending(struct sk_buff *skb) +{ + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; +} #endif /* EFX_NET_DRIVER_H */ diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 326d799762d..cdff40b6572 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -298,7 +298,7 @@ efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer) /************************************************************************** * * Generic buffer handling - * These buffers are used for interrupt status and MAC stats + * These buffers are used for interrupt status, MAC stats, etc. * **************************************************************************/ @@ -401,8 +401,10 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) ++tx_queue->write_count; /* Create TX descriptor ring entry */ + BUILD_BUG_ON(EFX_TX_BUF_CONT != 1); EFX_POPULATE_QWORD_4(*txd, - FSF_AZ_TX_KER_CONT, buffer->continuation, + FSF_AZ_TX_KER_CONT, + buffer->flags & EFX_TX_BUF_CONT, FSF_AZ_TX_KER_BYTE_COUNT, buffer->len, FSF_AZ_TX_KER_BUF_REGION, 0, FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr); diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index bab5cd9f574..438cef11f72 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -11,6 +11,7 @@ #ifndef EFX_NIC_H #define EFX_NIC_H +#include <linux/net_tstamp.h> #include <linux/i2c-algo-bit.h> #include "net_driver.h" #include "efx.h" @@ -250,6 +251,41 @@ extern int efx_sriov_get_vf_config(struct net_device *dev, int vf, extern int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf, bool spoofchk); +struct ethtool_ts_info; +#ifdef CONFIG_SFC_PTP +extern void efx_ptp_probe(struct efx_nic *efx); +extern int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd); +extern int efx_ptp_get_ts_info(struct net_device *net_dev, + struct ethtool_ts_info *ts_info); +extern bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); +extern int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb); +extern void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev); +#else +static inline void efx_ptp_probe(struct efx_nic *efx) {} +static inline int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd) +{ + return -EOPNOTSUPP; +} +static inline int efx_ptp_get_ts_info(struct net_device *net_dev, + struct ethtool_ts_info *ts_info) +{ + ts_info->so_timestamping = (SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE); + ts_info->phc_index = -1; + + return 0; +} +static inline bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + return false; +} +static inline int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + return NETDEV_TX_OK; +} +static inline void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) {} +#endif + extern const struct efx_nic_type falcon_a1_nic_type; extern const struct efx_nic_type falcon_b0_nic_type; extern const struct efx_nic_type siena_a0_nic_type; diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c new file mode 100644 index 00000000000..5b3dd028ce8 --- /dev/null +++ b/drivers/net/ethernet/sfc/ptp.c @@ -0,0 +1,1484 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2011 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +/* Theory of operation: + * + * PTP support is assisted by firmware running on the MC, which provides + * the hardware timestamping capabilities. Both transmitted and received + * PTP event packets are queued onto internal queues for subsequent processing; + * this is because the MC operations are relatively long and would block + * block NAPI/interrupt operation. + * + * Receive event processing: + * The event contains the packet's UUID and sequence number, together + * with the hardware timestamp. The PTP receive packet queue is searched + * for this UUID/sequence number and, if found, put on a pending queue. + * Packets not matching are delivered without timestamps (MCDI events will + * always arrive after the actual packet). + * It is important for the operation of the PTP protocol that the ordering + * of packets between the event and general port is maintained. + * + * Work queue processing: + * If work waiting, synchronise host/hardware time + * + * Transmit: send packet through MC, which returns the transmission time + * that is converted to an appropriate timestamp. + * + * Receive: the packet's reception time is converted to an appropriate + * timestamp. + */ +#include <linux/ip.h> +#include <linux/udp.h> +#include <linux/time.h> +#include <linux/ktime.h> +#include <linux/module.h> +#include <linux/net_tstamp.h> +#include <linux/pps_kernel.h> +#include <linux/ptp_clock_kernel.h> +#include "net_driver.h" +#include "efx.h" +#include "mcdi.h" +#include "mcdi_pcol.h" +#include "io.h" +#include "regs.h" +#include "nic.h" + +/* Maximum number of events expected to make up a PTP event */ +#define MAX_EVENT_FRAGS 3 + +/* Maximum delay, ms, to begin synchronisation */ +#define MAX_SYNCHRONISE_WAIT_MS 2 + +/* How long, at most, to spend synchronising */ +#define SYNCHRONISE_PERIOD_NS 250000 + +/* How often to update the shared memory time */ +#define SYNCHRONISATION_GRANULARITY_NS 200 + +/* Minimum permitted length of a (corrected) synchronisation time */ +#define MIN_SYNCHRONISATION_NS 120 + +/* Maximum permitted length of a (corrected) synchronisation time */ +#define MAX_SYNCHRONISATION_NS 1000 + +/* How many (MC) receive events that can be queued */ +#define MAX_RECEIVE_EVENTS 8 + +/* Length of (modified) moving average. */ +#define AVERAGE_LENGTH 16 + +/* How long an unmatched event or packet can be held */ +#define PKT_EVENT_LIFETIME_MS 10 + +/* Offsets into PTP packet for identification. These offsets are from the + * start of the IP header, not the MAC header. Note that neither PTP V1 nor + * PTP V2 permit the use of IPV4 options. + */ +#define PTP_DPORT_OFFSET 22 + +#define PTP_V1_VERSION_LENGTH 2 +#define PTP_V1_VERSION_OFFSET 28 + +#define PTP_V1_UUID_LENGTH 6 +#define PTP_V1_UUID_OFFSET 50 + +#define PTP_V1_SEQUENCE_LENGTH 2 +#define PTP_V1_SEQUENCE_OFFSET 58 + +/* The minimum length of a PTP V1 packet for offsets, etc. to be valid: + * includes IP header. + */ +#define PTP_V1_MIN_LENGTH 64 + +#define PTP_V2_VERSION_LENGTH 1 +#define PTP_V2_VERSION_OFFSET 29 + +/* Although PTP V2 UUIDs are comprised a ClockIdentity (8) and PortNumber (2), + * the MC only captures the last six bytes of the clock identity. These values + * reflect those, not the ones used in the standard. The standard permits + * mapping of V1 UUIDs to V2 UUIDs with these same values. + */ +#define PTP_V2_MC_UUID_LENGTH 6 +#define PTP_V2_MC_UUID_OFFSET 50 + +#define PTP_V2_SEQUENCE_LENGTH 2 +#define PTP_V2_SEQUENCE_OFFSET 58 + +/* The minimum length of a PTP V2 packet for offsets, etc. to be valid: + * includes IP header. + */ +#define PTP_V2_MIN_LENGTH 63 + +#define PTP_MIN_LENGTH 63 + +#define PTP_ADDRESS 0xe0000181 /* 224.0.1.129 */ +#define PTP_EVENT_PORT 319 +#define PTP_GENERAL_PORT 320 + +/* Annoyingly the format of the version numbers are different between + * versions 1 and 2 so it isn't possible to simply look for 1 or 2. + */ +#define PTP_VERSION_V1 1 + +#define PTP_VERSION_V2 2 +#define PTP_VERSION_V2_MASK 0x0f + +enum ptp_packet_state { + PTP_PACKET_STATE_UNMATCHED = 0, + PTP_PACKET_STATE_MATCHED, + PTP_PACKET_STATE_TIMED_OUT, + PTP_PACKET_STATE_MATCH_UNWANTED +}; + +/* NIC synchronised with single word of time only comprising + * partial seconds and full nanoseconds: 10^9 ~ 2^30 so 2 bits for seconds. + */ +#define MC_NANOSECOND_BITS 30 +#define MC_NANOSECOND_MASK ((1 << MC_NANOSECOND_BITS) - 1) +#define MC_SECOND_MASK ((1 << (32 - MC_NANOSECOND_BITS)) - 1) + +/* Maximum parts-per-billion adjustment that is acceptable */ +#define MAX_PPB 1000000 + +/* Number of bits required to hold the above */ +#define MAX_PPB_BITS 20 + +/* Number of extra bits allowed when calculating fractional ns. + * EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS + MAX_PPB_BITS should + * be less than 63. + */ +#define PPB_EXTRA_BITS 2 + +/* Precalculate scale word to avoid long long division at runtime */ +#define PPB_SCALE_WORD ((1LL << (PPB_EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS +\ + MAX_PPB_BITS)) / 1000000000LL) + +#define PTP_SYNC_ATTEMPTS 4 + +/** + * struct efx_ptp_match - Matching structure, stored in sk_buff's cb area. + * @words: UUID and (partial) sequence number + * @expiry: Time after which the packet should be delivered irrespective of + * event arrival. + * @state: The state of the packet - whether it is ready for processing or + * whether that is of no interest. + */ +struct efx_ptp_match { + u32 words[DIV_ROUND_UP(PTP_V1_UUID_LENGTH, 4)]; + unsigned long expiry; + enum ptp_packet_state state; +}; + +/** + * struct efx_ptp_event_rx - A PTP receive event (from MC) + * @seq0: First part of (PTP) UUID + * @seq1: Second part of (PTP) UUID and sequence number + * @hwtimestamp: Event timestamp + */ +struct efx_ptp_event_rx { + struct list_head link; + u32 seq0; + u32 seq1; + ktime_t hwtimestamp; + unsigned long expiry; +}; + +/** + * struct efx_ptp_timeset - Synchronisation between host and MC + * @host_start: Host time immediately before hardware timestamp taken + * @seconds: Hardware timestamp, seconds + * @nanoseconds: Hardware timestamp, nanoseconds + * @host_end: Host time immediately after hardware timestamp taken + * @waitns: Number of nanoseconds between hardware timestamp being read and + * host end time being seen + * @window: Difference of host_end and host_start + * @valid: Whether this timeset is valid + */ +struct efx_ptp_timeset { + u32 host_start; + u32 seconds; + u32 nanoseconds; + u32 host_end; + u32 waitns; + u32 window; /* Derived: end - start, allowing for wrap */ +}; + +/** + * struct efx_ptp_data - Precision Time Protocol (PTP) state + * @channel: The PTP channel + * @rxq: Receive queue (awaiting timestamps) + * @txq: Transmit queue + * @evt_list: List of MC receive events awaiting packets + * @evt_free_list: List of free events + * @evt_lock: Lock for manipulating evt_list and evt_free_list + * @rx_evts: Instantiated events (on evt_list and evt_free_list) + * @workwq: Work queue for processing pending PTP operations + * @work: Work task + * @reset_required: A serious error has occurred and the PTP task needs to be + * reset (disable, enable). + * @rxfilter_event: Receive filter when operating + * @rxfilter_general: Receive filter when operating + * @config: Current timestamp configuration + * @enabled: PTP operation enabled + * @mode: Mode in which PTP operating (PTP version) + * @evt_frags: Partly assembled PTP events + * @evt_frag_idx: Current fragment number + * @evt_code: Last event code + * @start: Address at which MC indicates ready for synchronisation + * @host_time_pps: Host time at last PPS + * @last_sync_ns: Last number of nanoseconds between readings when synchronising + * @base_sync_ns: Number of nanoseconds for last synchronisation. + * @base_sync_valid: Whether base_sync_time is valid. + * @current_adjfreq: Current ppb adjustment. + * @phc_clock: Pointer to registered phc device + * @phc_clock_info: Registration structure for phc device + * @pps_work: pps work task for handling pps events + * @pps_workwq: pps work queue + * @nic_ts_enabled: Flag indicating if NIC generated TS events are handled + * @txbuf: Buffer for use when transmitting (PTP) packets to MC (avoids + * allocations in main data path). + * @debug_ptp_dir: PTP debugfs directory + * @missed_rx_sync: Number of packets received without syncrhonisation. + * @good_syncs: Number of successful synchronisations. + * @no_time_syncs: Number of synchronisations with no good times. + * @bad_sync_durations: Number of synchronisations with bad durations. + * @bad_syncs: Number of failed synchronisations. + * @last_sync_time: Number of nanoseconds for last synchronisation. + * @sync_timeouts: Number of synchronisation timeouts + * @fast_syncs: Number of synchronisations requiring short delay + * @min_sync_delta: Minimum time between event and synchronisation + * @max_sync_delta: Maximum time between event and synchronisation + * @average_sync_delta: Average time between event and synchronisation. + * Modified moving average. + * @last_sync_delta: Last time between event and synchronisation + * @mc_stats: Context value for MC statistics + * @timeset: Last set of synchronisation statistics. + */ +struct efx_ptp_data { + struct efx_channel *channel; + struct sk_buff_head rxq; + struct sk_buff_head txq; + struct list_head evt_list; + struct list_head evt_free_list; + spinlock_t evt_lock; + struct efx_ptp_event_rx rx_evts[MAX_RECEIVE_EVENTS]; + struct workqueue_struct *workwq; + struct work_struct work; + bool reset_required; + u32 rxfilter_event; + u32 rxfilter_general; + bool rxfilter_installed; + struct hwtstamp_config config; + bool enabled; + unsigned int mode; + efx_qword_t evt_frags[MAX_EVENT_FRAGS]; + int evt_frag_idx; + int evt_code; + struct efx_buffer start; + struct pps_event_time host_time_pps; + unsigned last_sync_ns; + unsigned base_sync_ns; + bool base_sync_valid; + s64 current_adjfreq; + struct ptp_clock *phc_clock; + struct ptp_clock_info phc_clock_info; + struct work_struct pps_work; + struct workqueue_struct *pps_workwq; + bool nic_ts_enabled; + u8 txbuf[ALIGN(MC_CMD_PTP_IN_TRANSMIT_LEN( + MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM), 4)]; + struct efx_ptp_timeset + timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM]; +}; + +static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta); +static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta); +static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts); +static int efx_phc_settime(struct ptp_clock_info *ptp, + const struct timespec *e_ts); +static int efx_phc_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *request, int on); + +/* Enable MCDI PTP support. */ +static int efx_ptp_enable(struct efx_nic *efx) +{ + u8 inbuf[MC_CMD_PTP_IN_ENABLE_LEN]; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ENABLE); + MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_QUEUE, + efx->ptp_data->channel->channel); + MCDI_SET_DWORD(inbuf, PTP_IN_ENABLE_MODE, efx->ptp_data->mode); + + return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +/* Disable MCDI PTP support. + * + * Note that this function should never rely on the presence of ptp_data - + * may be called before that exists. + */ +static int efx_ptp_disable(struct efx_nic *efx) +{ + u8 inbuf[MC_CMD_PTP_IN_DISABLE_LEN]; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_DISABLE); + return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static void efx_ptp_deliver_rx_queue(struct sk_buff_head *q) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(q))) { + local_bh_disable(); + netif_receive_skb(skb); + local_bh_enable(); + } +} + +static void efx_ptp_handle_no_channel(struct efx_nic *efx) +{ + netif_err(efx, drv, efx->net_dev, + "ERROR: PTP requires MSI-X and 1 additional interrupt" + "vector. PTP disabled\n"); +} + +/* Repeatedly send the host time to the MC which will capture the hardware + * time. + */ +static void efx_ptp_send_times(struct efx_nic *efx, + struct pps_event_time *last_time) +{ + struct pps_event_time now; + struct timespec limit; + struct efx_ptp_data *ptp = efx->ptp_data; + struct timespec start; + int *mc_running = ptp->start.addr; + + pps_get_ts(&now); + start = now.ts_real; + limit = now.ts_real; + timespec_add_ns(&limit, SYNCHRONISE_PERIOD_NS); + + /* Write host time for specified period or until MC is done */ + while ((timespec_compare(&now.ts_real, &limit) < 0) && + ACCESS_ONCE(*mc_running)) { + struct timespec update_time; + unsigned int host_time; + + /* Don't update continuously to avoid saturating the PCIe bus */ + update_time = now.ts_real; + timespec_add_ns(&update_time, SYNCHRONISATION_GRANULARITY_NS); + do { + pps_get_ts(&now); + } while ((timespec_compare(&now.ts_real, &update_time) < 0) && + ACCESS_ONCE(*mc_running)); + + /* Synchronise NIC with single word of time only */ + host_time = (now.ts_real.tv_sec << MC_NANOSECOND_BITS | + now.ts_real.tv_nsec); + /* Update host time in NIC memory */ + _efx_writed(efx, cpu_to_le32(host_time), + FR_CZ_MC_TREG_SMEM + MC_SMEM_P0_PTP_TIME_OFST); + } + *last_time = now; +} + +/* Read a timeset from the MC's results and partial process. */ +static void efx_ptp_read_timeset(u8 *data, struct efx_ptp_timeset *timeset) +{ + unsigned start_ns, end_ns; + + timeset->host_start = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTSTART); + timeset->seconds = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_SECONDS); + timeset->nanoseconds = MCDI_DWORD(data, + PTP_OUT_SYNCHRONIZE_NANOSECONDS); + timeset->host_end = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_HOSTEND), + timeset->waitns = MCDI_DWORD(data, PTP_OUT_SYNCHRONIZE_WAITNS); + + /* Ignore seconds */ + start_ns = timeset->host_start & MC_NANOSECOND_MASK; + end_ns = timeset->host_end & MC_NANOSECOND_MASK; + /* Allow for rollover */ + if (end_ns < start_ns) + end_ns += NSEC_PER_SEC; + /* Determine duration of operation */ + timeset->window = end_ns - start_ns; +} + +/* Process times received from MC. + * + * Extract times from returned results, and establish the minimum value + * seen. The minimum value represents the "best" possible time and events + * too much greater than this are rejected - the machine is, perhaps, too + * busy. A number of readings are taken so that, hopefully, at least one good + * synchronisation will be seen in the results. + */ +static int efx_ptp_process_times(struct efx_nic *efx, u8 *synch_buf, + size_t response_length, + const struct pps_event_time *last_time) +{ + unsigned number_readings = (response_length / + MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN); + unsigned i; + unsigned min; + unsigned min_set = 0; + unsigned total; + unsigned ngood = 0; + unsigned last_good = 0; + struct efx_ptp_data *ptp = efx->ptp_data; + bool min_valid = false; + u32 last_sec; + u32 start_sec; + struct timespec delta; + + if (number_readings == 0) + return -EAGAIN; + + /* Find minimum value in this set of results, discarding clearly + * erroneous results. + */ + for (i = 0; i < number_readings; i++) { + efx_ptp_read_timeset(synch_buf, &ptp->timeset[i]); + synch_buf += MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_LEN; + if (ptp->timeset[i].window > SYNCHRONISATION_GRANULARITY_NS) { + if (min_valid) { + if (ptp->timeset[i].window < min_set) + min_set = ptp->timeset[i].window; + } else { + min_valid = true; + min_set = ptp->timeset[i].window; + } + } + } + + if (min_valid) { + if (ptp->base_sync_valid && (min_set > ptp->base_sync_ns)) + min = ptp->base_sync_ns; + else + min = min_set; + } else { + min = SYNCHRONISATION_GRANULARITY_NS; + } + + /* Discard excessively long synchronise durations. The MC times + * when it finishes reading the host time so the corrected window + * time should be fairly constant for a given platform. + */ + total = 0; + for (i = 0; i < number_readings; i++) + if (ptp->timeset[i].window > ptp->timeset[i].waitns) { + unsigned win; + + win = ptp->timeset[i].window - ptp->timeset[i].waitns; + if (win >= MIN_SYNCHRONISATION_NS && + win < MAX_SYNCHRONISATION_NS) { + total += ptp->timeset[i].window; + ngood++; + last_good = i; + } + } + + if (ngood == 0) { + netif_warn(efx, drv, efx->net_dev, + "PTP no suitable synchronisations %dns %dns\n", + ptp->base_sync_ns, min_set); + return -EAGAIN; + } + + /* Average minimum this synchronisation */ + ptp->last_sync_ns = DIV_ROUND_UP(total, ngood); + if (!ptp->base_sync_valid || (ptp->last_sync_ns < ptp->base_sync_ns)) { + ptp->base_sync_valid = true; + ptp->base_sync_ns = ptp->last_sync_ns; + } + + /* Calculate delay from actual PPS to last_time */ + delta.tv_nsec = + ptp->timeset[last_good].nanoseconds + + last_time->ts_real.tv_nsec - + (ptp->timeset[last_good].host_start & MC_NANOSECOND_MASK); + + /* It is possible that the seconds rolled over between taking + * the start reading and the last value written by the host. The + * timescales are such that a gap of more than one second is never + * expected. + */ + start_sec = ptp->timeset[last_good].host_start >> MC_NANOSECOND_BITS; + last_sec = last_time->ts_real.tv_sec & MC_SECOND_MASK; + if (start_sec != last_sec) { + if (((start_sec + 1) & MC_SECOND_MASK) != last_sec) { + netif_warn(efx, hw, efx->net_dev, + "PTP bad synchronisation seconds\n"); + return -EAGAIN; + } else { + delta.tv_sec = 1; + } + } else { + delta.tv_sec = 0; + } + + ptp->host_time_pps = *last_time; + pps_sub_ts(&ptp->host_time_pps, delta); + + return 0; +} + +/* Synchronize times between the host and the MC */ +static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + u8 synch_buf[MC_CMD_PTP_OUT_SYNCHRONIZE_LENMAX]; + size_t response_length; + int rc; + unsigned long timeout; + struct pps_event_time last_time = {}; + unsigned int loops = 0; + int *start = ptp->start.addr; + + MCDI_SET_DWORD(synch_buf, PTP_IN_OP, MC_CMD_PTP_OP_SYNCHRONIZE); + MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_NUMTIMESETS, + num_readings); + MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_START_ADDR_LO, + (u32)ptp->start.dma_addr); + MCDI_SET_DWORD(synch_buf, PTP_IN_SYNCHRONIZE_START_ADDR_HI, + (u32)((u64)ptp->start.dma_addr >> 32)); + + /* Clear flag that signals MC ready */ + ACCESS_ONCE(*start) = 0; + efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf, + MC_CMD_PTP_IN_SYNCHRONIZE_LEN); + + /* Wait for start from MCDI (or timeout) */ + timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS); + while (!ACCESS_ONCE(*start) && (time_before(jiffies, timeout))) { + udelay(20); /* Usually start MCDI execution quickly */ + loops++; + } + + if (ACCESS_ONCE(*start)) + efx_ptp_send_times(efx, &last_time); + + /* Collect results */ + rc = efx_mcdi_rpc_finish(efx, MC_CMD_PTP, + MC_CMD_PTP_IN_SYNCHRONIZE_LEN, + synch_buf, sizeof(synch_buf), + &response_length); + if (rc == 0) + rc = efx_ptp_process_times(efx, synch_buf, response_length, + &last_time); + + return rc; +} + +/* Transmit a PTP packet, via the MCDI interface, to the wire. */ +static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb) +{ + u8 *txbuf = efx->ptp_data->txbuf; + struct skb_shared_hwtstamps timestamps; + int rc = -EIO; + /* MCDI driver requires word aligned lengths */ + size_t len = ALIGN(MC_CMD_PTP_IN_TRANSMIT_LEN(skb->len), 4); + u8 txtime[MC_CMD_PTP_OUT_TRANSMIT_LEN]; + + MCDI_SET_DWORD(txbuf, PTP_IN_OP, MC_CMD_PTP_OP_TRANSMIT); + MCDI_SET_DWORD(txbuf, PTP_IN_TRANSMIT_LENGTH, skb->len); + if (skb_shinfo(skb)->nr_frags != 0) { + rc = skb_linearize(skb); + if (rc != 0) + goto fail; + } + + if (skb->ip_summed == CHECKSUM_PARTIAL) { + rc = skb_checksum_help(skb); + if (rc != 0) + goto fail; + } + skb_copy_from_linear_data(skb, + &txbuf[MC_CMD_PTP_IN_TRANSMIT_PACKET_OFST], + len); + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, txbuf, len, txtime, + sizeof(txtime), &len); + if (rc != 0) + goto fail; + + memset(×tamps, 0, sizeof(timestamps)); + timestamps.hwtstamp = ktime_set( + MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_SECONDS), + MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_NANOSECONDS)); + + skb_tstamp_tx(skb, ×tamps); + + rc = 0; + +fail: + dev_kfree_skb(skb); + + return rc; +} + +static void efx_ptp_drop_time_expired_events(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct list_head *cursor; + struct list_head *next; + + /* Drop time-expired events */ + spin_lock_bh(&ptp->evt_lock); + if (!list_empty(&ptp->evt_list)) { + list_for_each_safe(cursor, next, &ptp->evt_list) { + struct efx_ptp_event_rx *evt; + + evt = list_entry(cursor, struct efx_ptp_event_rx, + link); + if (time_after(jiffies, evt->expiry)) { + list_del(&evt->link); + list_add(&evt->link, &ptp->evt_free_list); + netif_warn(efx, hw, efx->net_dev, + "PTP rx event dropped\n"); + } + } + } + spin_unlock_bh(&ptp->evt_lock); +} + +static enum ptp_packet_state efx_ptp_match_rx(struct efx_nic *efx, + struct sk_buff *skb) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + bool evts_waiting; + struct list_head *cursor; + struct list_head *next; + struct efx_ptp_match *match; + enum ptp_packet_state rc = PTP_PACKET_STATE_UNMATCHED; + + spin_lock_bh(&ptp->evt_lock); + evts_waiting = !list_empty(&ptp->evt_list); + spin_unlock_bh(&ptp->evt_lock); + + if (!evts_waiting) + return PTP_PACKET_STATE_UNMATCHED; + + match = (struct efx_ptp_match *)skb->cb; + /* Look for a matching timestamp in the event queue */ + spin_lock_bh(&ptp->evt_lock); + list_for_each_safe(cursor, next, &ptp->evt_list) { + struct efx_ptp_event_rx *evt; + + evt = list_entry(cursor, struct efx_ptp_event_rx, link); + if ((evt->seq0 == match->words[0]) && + (evt->seq1 == match->words[1])) { + struct skb_shared_hwtstamps *timestamps; + + /* Match - add in hardware timestamp */ + timestamps = skb_hwtstamps(skb); + timestamps->hwtstamp = evt->hwtimestamp; + + match->state = PTP_PACKET_STATE_MATCHED; + rc = PTP_PACKET_STATE_MATCHED; + list_del(&evt->link); + list_add(&evt->link, &ptp->evt_free_list); + break; + } + } + spin_unlock_bh(&ptp->evt_lock); + + return rc; +} + +/* Process any queued receive events and corresponding packets + * + * q is returned with all the packets that are ready for delivery. + * true is returned if at least one of those packets requires + * synchronisation. + */ +static bool efx_ptp_process_events(struct efx_nic *efx, struct sk_buff_head *q) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + bool rc = false; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&ptp->rxq))) { + struct efx_ptp_match *match; + + match = (struct efx_ptp_match *)skb->cb; + if (match->state == PTP_PACKET_STATE_MATCH_UNWANTED) { + __skb_queue_tail(q, skb); + } else if (efx_ptp_match_rx(efx, skb) == + PTP_PACKET_STATE_MATCHED) { + rc = true; + __skb_queue_tail(q, skb); + } else if (time_after(jiffies, match->expiry)) { + match->state = PTP_PACKET_STATE_TIMED_OUT; + netif_warn(efx, rx_err, efx->net_dev, + "PTP packet - no timestamp seen\n"); + __skb_queue_tail(q, skb); + } else { + /* Replace unprocessed entry and stop */ + skb_queue_head(&ptp->rxq, skb); + break; + } + } + + return rc; +} + +/* Complete processing of a received packet */ +static inline void efx_ptp_process_rx(struct efx_nic *efx, struct sk_buff *skb) +{ + local_bh_disable(); + netif_receive_skb(skb); + local_bh_enable(); +} + +static int efx_ptp_start(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + struct efx_filter_spec rxfilter; + int rc; + + ptp->reset_required = false; + + /* Must filter on both event and general ports to ensure + * that there is no packet re-ordering. + */ + efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0, + efx_rx_queue_index( + efx_channel_get_rx_queue(ptp->channel))); + rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP, + htonl(PTP_ADDRESS), + htons(PTP_EVENT_PORT)); + if (rc != 0) + return rc; + + rc = efx_filter_insert_filter(efx, &rxfilter, true); + if (rc < 0) + return rc; + ptp->rxfilter_event = rc; + + efx_filter_init_rx(&rxfilter, EFX_FILTER_PRI_REQUIRED, 0, + efx_rx_queue_index( + efx_channel_get_rx_queue(ptp->channel))); + rc = efx_filter_set_ipv4_local(&rxfilter, IPPROTO_UDP, + htonl(PTP_ADDRESS), + htons(PTP_GENERAL_PORT)); + if (rc != 0) + goto fail; + + rc = efx_filter_insert_filter(efx, &rxfilter, true); + if (rc < 0) + goto fail; + ptp->rxfilter_general = rc; + + rc = efx_ptp_enable(efx); + if (rc != 0) + goto fail2; + + ptp->evt_frag_idx = 0; + ptp->current_adjfreq = 0; + ptp->rxfilter_installed = true; + + return 0; + +fail2: + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_general); +fail: + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_event); + + return rc; +} + +static int efx_ptp_stop(struct efx_nic *efx) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + int rc = efx_ptp_disable(efx); + struct list_head *cursor; + struct list_head *next; + + if (ptp->rxfilter_installed) { + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_general); + efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, + ptp->rxfilter_event); + ptp->rxfilter_installed = false; + } + + /* Make sure RX packets are really delivered */ + efx_ptp_deliver_rx_queue(&efx->ptp_data->rxq); + skb_queue_purge(&efx->ptp_data->txq); + + /* Drop any pending receive events */ + spin_lock_bh(&efx->ptp_data->evt_lock); + list_for_each_safe(cursor, next, &efx->ptp_data->evt_list) { + list_del(cursor); + list_add(cursor, &efx->ptp_data->evt_free_list); + } + spin_unlock_bh(&efx->ptp_data->evt_lock); + + return rc; +} + +static void efx_ptp_pps_worker(struct work_struct *work) +{ + struct efx_ptp_data *ptp = + container_of(work, struct efx_ptp_data, pps_work); + struct efx_nic *efx = ptp->channel->efx; + struct ptp_clock_event ptp_evt; + + if (efx_ptp_synchronize(efx, PTP_SYNC_ATTEMPTS)) + return; + + ptp_evt.type = PTP_CLOCK_PPSUSR; + ptp_evt.pps_times = ptp->host_time_pps; + ptp_clock_event(ptp->phc_clock, &ptp_evt); +} + +/* Process any pending transmissions and timestamp any received packets. + */ +static void efx_ptp_worker(struct work_struct *work) +{ + struct efx_ptp_data *ptp_data = + container_of(work, struct efx_ptp_data, work); + struct efx_nic *efx = ptp_data->channel->efx; + struct sk_buff *skb; + struct sk_buff_head tempq; + + if (ptp_data->reset_required) { + efx_ptp_stop(efx); + efx_ptp_start(efx); + return; + } + + efx_ptp_drop_time_expired_events(efx); + + __skb_queue_head_init(&tempq); + if (efx_ptp_process_events(efx, &tempq) || + !skb_queue_empty(&ptp_data->txq)) { + + while ((skb = skb_dequeue(&ptp_data->txq))) + efx_ptp_xmit_skb(efx, skb); + } + + while ((skb = __skb_dequeue(&tempq))) + efx_ptp_process_rx(efx, skb); +} + +/* Initialise PTP channel and state. + * + * Setting core_index to zero causes the queue to be initialised and doesn't + * overlap with 'rxq0' because ptp.c doesn't use skb_record_rx_queue. + */ +static int efx_ptp_probe_channel(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp; + int rc = 0; + unsigned int pos; + + channel->irq_moderation = 0; + channel->rx_queue.core_index = 0; + + ptp = kzalloc(sizeof(struct efx_ptp_data), GFP_KERNEL); + efx->ptp_data = ptp; + if (!efx->ptp_data) + return -ENOMEM; + + rc = efx_nic_alloc_buffer(efx, &ptp->start, sizeof(int)); + if (rc != 0) + goto fail1; + + ptp->channel = channel; + skb_queue_head_init(&ptp->rxq); + skb_queue_head_init(&ptp->txq); + ptp->workwq = create_singlethread_workqueue("sfc_ptp"); + if (!ptp->workwq) { + rc = -ENOMEM; + goto fail2; + } + + INIT_WORK(&ptp->work, efx_ptp_worker); + ptp->config.flags = 0; + ptp->config.tx_type = HWTSTAMP_TX_OFF; + ptp->config.rx_filter = HWTSTAMP_FILTER_NONE; + INIT_LIST_HEAD(&ptp->evt_list); + INIT_LIST_HEAD(&ptp->evt_free_list); + spin_lock_init(&ptp->evt_lock); + for (pos = 0; pos < MAX_RECEIVE_EVENTS; pos++) + list_add(&ptp->rx_evts[pos].link, &ptp->evt_free_list); + + ptp->phc_clock_info.owner = THIS_MODULE; + snprintf(ptp->phc_clock_info.name, + sizeof(ptp->phc_clock_info.name), + "%pm", efx->net_dev->perm_addr); + ptp->phc_clock_info.max_adj = MAX_PPB; + ptp->phc_clock_info.n_alarm = 0; + ptp->phc_clock_info.n_ext_ts = 0; + ptp->phc_clock_info.n_per_out = 0; + ptp->phc_clock_info.pps = 1; + ptp->phc_clock_info.adjfreq = efx_phc_adjfreq; + ptp->phc_clock_info.adjtime = efx_phc_adjtime; + ptp->phc_clock_info.gettime = efx_phc_gettime; + ptp->phc_clock_info.settime = efx_phc_settime; + ptp->phc_clock_info.enable = efx_phc_enable; + + ptp->phc_clock = ptp_clock_register(&ptp->phc_clock_info, + &efx->pci_dev->dev); + if (!ptp->phc_clock) + goto fail3; + + INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); + ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); + if (!ptp->pps_workwq) { + rc = -ENOMEM; + goto fail4; + } + ptp->nic_ts_enabled = false; + + return 0; +fail4: + ptp_clock_unregister(efx->ptp_data->phc_clock); + +fail3: + destroy_workqueue(efx->ptp_data->workwq); + +fail2: + efx_nic_free_buffer(efx, &ptp->start); + +fail1: + kfree(efx->ptp_data); + efx->ptp_data = NULL; + + return rc; +} + +static void efx_ptp_remove_channel(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + + if (!efx->ptp_data) + return; + + (void)efx_ptp_disable(channel->efx); + + cancel_work_sync(&efx->ptp_data->work); + cancel_work_sync(&efx->ptp_data->pps_work); + + skb_queue_purge(&efx->ptp_data->rxq); + skb_queue_purge(&efx->ptp_data->txq); + + ptp_clock_unregister(efx->ptp_data->phc_clock); + + destroy_workqueue(efx->ptp_data->workwq); + destroy_workqueue(efx->ptp_data->pps_workwq); + + efx_nic_free_buffer(efx, &efx->ptp_data->start); + kfree(efx->ptp_data); +} + +static void efx_ptp_get_channel_name(struct efx_channel *channel, + char *buf, size_t len) +{ + snprintf(buf, len, "%s-ptp", channel->efx->name); +} + +/* Determine whether this packet should be processed by the PTP module + * or transmitted conventionally. + */ +bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + return efx->ptp_data && + efx->ptp_data->enabled && + skb->len >= PTP_MIN_LENGTH && + skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM && + likely(skb->protocol == htons(ETH_P_IP)) && + ip_hdr(skb)->protocol == IPPROTO_UDP && + udp_hdr(skb)->dest == htons(PTP_EVENT_PORT); +} + +/* Receive a PTP packet. Packets are queued until the arrival of + * the receive timestamp from the MC - this will probably occur after the + * packet arrival because of the processing in the MC. + */ +static void efx_ptp_rx(struct efx_channel *channel, struct sk_buff *skb) +{ + struct efx_nic *efx = channel->efx; + struct efx_ptp_data *ptp = efx->ptp_data; + struct efx_ptp_match *match = (struct efx_ptp_match *)skb->cb; + u8 *data; + unsigned int version; + + match->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); + + /* Correct version? */ + if (ptp->mode == MC_CMD_PTP_MODE_V1) { + if (skb->len < PTP_V1_MIN_LENGTH) { + netif_receive_skb(skb); + return; + } + version = ntohs(*(__be16 *)&skb->data[PTP_V1_VERSION_OFFSET]); + if (version != PTP_VERSION_V1) { + netif_receive_skb(skb); + return; + } + } else { + if (skb->len < PTP_V2_MIN_LENGTH) { + netif_receive_skb(skb); + return; + } + version = skb->data[PTP_V2_VERSION_OFFSET]; + + BUG_ON(ptp->mode != MC_CMD_PTP_MODE_V2); + BUILD_BUG_ON(PTP_V1_UUID_OFFSET != PTP_V2_MC_UUID_OFFSET); + BUILD_BUG_ON(PTP_V1_UUID_LENGTH != PTP_V2_MC_UUID_LENGTH); + BUILD_BUG_ON(PTP_V1_SEQUENCE_OFFSET != PTP_V2_SEQUENCE_OFFSET); + BUILD_BUG_ON(PTP_V1_SEQUENCE_LENGTH != PTP_V2_SEQUENCE_LENGTH); + + if ((version & PTP_VERSION_V2_MASK) != PTP_VERSION_V2) { + netif_receive_skb(skb); + return; + } + } + + /* Does this packet require timestamping? */ + if (ntohs(*(__be16 *)&skb->data[PTP_DPORT_OFFSET]) == PTP_EVENT_PORT) { + struct skb_shared_hwtstamps *timestamps; + + match->state = PTP_PACKET_STATE_UNMATCHED; + + /* Clear all timestamps held: filled in later */ + timestamps = skb_hwtstamps(skb); + memset(timestamps, 0, sizeof(*timestamps)); + + /* Extract UUID/Sequence information */ + data = skb->data + PTP_V1_UUID_OFFSET; + match->words[0] = (data[0] | + (data[1] << 8) | + (data[2] << 16) | + (data[3] << 24)); + match->words[1] = (data[4] | + (data[5] << 8) | + (skb->data[PTP_V1_SEQUENCE_OFFSET + + PTP_V1_SEQUENCE_LENGTH - 1] << + 16)); + } else { + match->state = PTP_PACKET_STATE_MATCH_UNWANTED; + } + + skb_queue_tail(&ptp->rxq, skb); + queue_work(ptp->workwq, &ptp->work); +} + +/* Transmit a PTP packet. This has to be transmitted by the MC + * itself, through an MCDI call. MCDI calls aren't permitted + * in the transmit path so defer the actual transmission to a suitable worker. + */ +int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + + skb_queue_tail(&ptp->txq, skb); + + if ((udp_hdr(skb)->dest == htons(PTP_EVENT_PORT)) && + (skb->len <= MC_CMD_PTP_IN_TRANSMIT_PACKET_MAXNUM)) + efx_xmit_hwtstamp_pending(skb); + queue_work(ptp->workwq, &ptp->work); + + return NETDEV_TX_OK; +} + +static int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted, + unsigned int new_mode) +{ + if ((enable_wanted != efx->ptp_data->enabled) || + (enable_wanted && (efx->ptp_data->mode != new_mode))) { + int rc; + + if (enable_wanted) { + /* Change of mode requires disable */ + if (efx->ptp_data->enabled && + (efx->ptp_data->mode != new_mode)) { + efx->ptp_data->enabled = false; + rc = efx_ptp_stop(efx); + if (rc != 0) + return rc; + } + + /* Set new operating mode and establish + * baseline synchronisation, which must + * succeed. + */ + efx->ptp_data->mode = new_mode; + rc = efx_ptp_start(efx); + if (rc == 0) { + rc = efx_ptp_synchronize(efx, + PTP_SYNC_ATTEMPTS * 2); + if (rc != 0) + efx_ptp_stop(efx); + } + } else { + rc = efx_ptp_stop(efx); + } + + if (rc != 0) + return rc; + + efx->ptp_data->enabled = enable_wanted; + } + + return 0; +} + +static int efx_ptp_ts_init(struct efx_nic *efx, struct hwtstamp_config *init) +{ + bool enable_wanted = false; + unsigned int new_mode; + int rc; + + if (init->flags) + return -EINVAL; + + if ((init->tx_type != HWTSTAMP_TX_OFF) && + (init->tx_type != HWTSTAMP_TX_ON)) + return -ERANGE; + + new_mode = efx->ptp_data->mode; + /* Determine whether any PTP HW operations are required */ + switch (init->rx_filter) { + case HWTSTAMP_FILTER_NONE: + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + init->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + new_mode = MC_CMD_PTP_MODE_V1; + enable_wanted = true; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + /* Although these three are accepted only IPV4 packets will be + * timestamped + */ + init->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + new_mode = MC_CMD_PTP_MODE_V2; + enable_wanted = true; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + /* Non-IP + IPv6 timestamping not supported */ + return -ERANGE; + break; + default: + return -ERANGE; + } + + if (init->tx_type != HWTSTAMP_TX_OFF) + enable_wanted = true; + + rc = efx_ptp_change_mode(efx, enable_wanted, new_mode); + if (rc != 0) + return rc; + + efx->ptp_data->config = *init; + + return 0; +} + +int +efx_ptp_get_ts_info(struct net_device *net_dev, struct ethtool_ts_info *ts_info) +{ + struct efx_nic *efx = netdev_priv(net_dev); + struct efx_ptp_data *ptp = efx->ptp_data; + + if (!ptp) + return -EOPNOTSUPP; + + ts_info->so_timestamping = (SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE); + ts_info->phc_index = ptp_clock_index(ptp->phc_clock); + ts_info->tx_types = 1 << HWTSTAMP_TX_OFF | 1 << HWTSTAMP_TX_ON; + ts_info->rx_filters = (1 << HWTSTAMP_FILTER_NONE | + 1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT | + 1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC | + 1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ | + 1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT | + 1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC | + 1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ); + return 0; +} + +int efx_ptp_ioctl(struct efx_nic *efx, struct ifreq *ifr, int cmd) +{ + struct hwtstamp_config config; + int rc; + + /* Not a PTP enabled port */ + if (!efx->ptp_data) + return -EOPNOTSUPP; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + rc = efx_ptp_ts_init(efx, &config); + if (rc != 0) + return rc; + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) + ? -EFAULT : 0; +} + +static void ptp_event_failure(struct efx_nic *efx, int expected_frag_len) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + + netif_err(efx, hw, efx->net_dev, + "PTP unexpected event length: got %d expected %d\n", + ptp->evt_frag_idx, expected_frag_len); + ptp->reset_required = true; + queue_work(ptp->workwq, &ptp->work); +} + +/* Process a completed receive event. Put it on the event queue and + * start worker thread. This is required because event and their + * correspoding packets may come in either order. + */ +static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + struct efx_ptp_event_rx *evt = NULL; + + if (ptp->evt_frag_idx != 3) { + ptp_event_failure(efx, 3); + return; + } + + spin_lock_bh(&ptp->evt_lock); + if (!list_empty(&ptp->evt_free_list)) { + evt = list_first_entry(&ptp->evt_free_list, + struct efx_ptp_event_rx, link); + list_del(&evt->link); + + evt->seq0 = EFX_QWORD_FIELD(ptp->evt_frags[2], MCDI_EVENT_DATA); + evt->seq1 = (EFX_QWORD_FIELD(ptp->evt_frags[2], + MCDI_EVENT_SRC) | + (EFX_QWORD_FIELD(ptp->evt_frags[1], + MCDI_EVENT_SRC) << 8) | + (EFX_QWORD_FIELD(ptp->evt_frags[0], + MCDI_EVENT_SRC) << 16)); + evt->hwtimestamp = ktime_set( + EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA), + EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA)); + evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS); + list_add_tail(&evt->link, &ptp->evt_list); + + queue_work(ptp->workwq, &ptp->work); + } else { + netif_err(efx, rx_err, efx->net_dev, "No free PTP event"); + } + spin_unlock_bh(&ptp->evt_lock); +} + +static void ptp_event_fault(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + int code = EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA); + if (ptp->evt_frag_idx != 1) { + ptp_event_failure(efx, 1); + return; + } + + netif_err(efx, hw, efx->net_dev, "PTP error %d\n", code); +} + +static void ptp_event_pps(struct efx_nic *efx, struct efx_ptp_data *ptp) +{ + if (ptp->nic_ts_enabled) + queue_work(ptp->pps_workwq, &ptp->pps_work); +} + +void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) +{ + struct efx_ptp_data *ptp = efx->ptp_data; + int code = EFX_QWORD_FIELD(*ev, MCDI_EVENT_CODE); + + if (!ptp->enabled) + return; + + if (ptp->evt_frag_idx == 0) { + ptp->evt_code = code; + } else if (ptp->evt_code != code) { + netif_err(efx, hw, efx->net_dev, + "PTP out of sequence event %d\n", code); + ptp->evt_frag_idx = 0; + } + + ptp->evt_frags[ptp->evt_frag_idx++] = *ev; + if (!MCDI_EVENT_FIELD(*ev, CONT)) { + /* Process resulting event */ + switch (code) { + case MCDI_EVENT_CODE_PTP_RX: + ptp_event_rx(efx, ptp); + break; + case MCDI_EVENT_CODE_PTP_FAULT: + ptp_event_fault(efx, ptp); + break; + case MCDI_EVENT_CODE_PTP_PPS: + ptp_event_pps(efx, ptp); + break; + default: + netif_err(efx, hw, efx->net_dev, + "PTP unknown event %d\n", code); + break; + } + ptp->evt_frag_idx = 0; + } else if (MAX_EVENT_FRAGS == ptp->evt_frag_idx) { + netif_err(efx, hw, efx->net_dev, + "PTP too many event fragments\n"); + ptp->evt_frag_idx = 0; + } +} + +static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->channel->efx; + u8 inadj[MC_CMD_PTP_IN_ADJUST_LEN]; + s64 adjustment_ns; + int rc; + + if (delta > MAX_PPB) + delta = MAX_PPB; + else if (delta < -MAX_PPB) + delta = -MAX_PPB; + + /* Convert ppb to fixed point ns. */ + adjustment_ns = (((s64)delta * PPB_SCALE_WORD) >> + (PPB_EXTRA_BITS + MAX_PPB_BITS)); + + MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_FREQ_LO, (u32)adjustment_ns); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_FREQ_HI, + (u32)(adjustment_ns >> 32)); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_SECONDS, 0); + MCDI_SET_DWORD(inadj, PTP_IN_ADJUST_NANOSECONDS, 0); + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inadj, sizeof(inadj), + NULL, 0, NULL); + if (rc != 0) + return rc; + + ptp_data->current_adjfreq = delta; + return 0; +} + +static int efx_phc_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->channel->efx; + struct timespec delta_ts = ns_to_timespec(delta); + u8 inbuf[MC_CMD_PTP_IN_ADJUST_LEN]; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_FREQ_LO, 0); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_FREQ_HI, 0); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_SECONDS, (u32)delta_ts.tv_sec); + MCDI_SET_DWORD(inbuf, PTP_IN_ADJUST_NANOSECONDS, (u32)delta_ts.tv_nsec); + return efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + +static int efx_phc_gettime(struct ptp_clock_info *ptp, struct timespec *ts) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + struct efx_nic *efx = ptp_data->channel->efx; + u8 inbuf[MC_CMD_PTP_IN_READ_NIC_TIME_LEN]; + u8 outbuf[MC_CMD_PTP_OUT_READ_NIC_TIME_LEN]; + int rc; + + MCDI_SET_DWORD(inbuf, PTP_IN_OP, MC_CMD_PTP_OP_READ_NIC_TIME); + + rc = efx_mcdi_rpc(efx, MC_CMD_PTP, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), NULL); + if (rc != 0) + return rc; + + ts->tv_sec = MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_SECONDS); + ts->tv_nsec = MCDI_DWORD(outbuf, PTP_OUT_READ_NIC_TIME_NANOSECONDS); + return 0; +} + +static int efx_phc_settime(struct ptp_clock_info *ptp, + const struct timespec *e_ts) +{ + /* Get the current NIC time, efx_phc_gettime. + * Subtract from the desired time to get the offset + * call efx_phc_adjtime with the offset + */ + int rc; + struct timespec time_now; + struct timespec delta; + + rc = efx_phc_gettime(ptp, &time_now); + if (rc != 0) + return rc; + + delta = timespec_sub(*e_ts, time_now); + + efx_phc_adjtime(ptp, timespec_to_ns(&delta)); + if (rc != 0) + return rc; + + return 0; +} + +static int efx_phc_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *request, + int enable) +{ + struct efx_ptp_data *ptp_data = container_of(ptp, + struct efx_ptp_data, + phc_clock_info); + if (request->type != PTP_CLK_REQ_PPS) + return -EOPNOTSUPP; + + ptp_data->nic_ts_enabled = !!enable; + return 0; +} + +static const struct efx_channel_type efx_ptp_channel_type = { + .handle_no_channel = efx_ptp_handle_no_channel, + .pre_probe = efx_ptp_probe_channel, + .post_remove = efx_ptp_remove_channel, + .get_name = efx_ptp_get_channel_name, + /* no copy operation; there is no need to reallocate this channel */ + .receive_skb = efx_ptp_rx, + .keep_eventq = false, +}; + +void efx_ptp_probe(struct efx_nic *efx) +{ + /* Check whether PTP is implemented on this NIC. The DISABLE + * operation will succeed if and only if it is implemented. + */ + if (efx_ptp_disable(efx) == 0) + efx->extra_channel_type[EFX_EXTRA_CHANNEL_PTP] = + &efx_ptp_channel_type; +} diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 719319b89d7..9e0ad1b75c3 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -479,7 +479,7 @@ static void efx_rx_packet_gro(struct efx_channel *channel, skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE); - skb_record_rx_queue(skb, channel->channel); + skb_record_rx_queue(skb, channel->rx_queue.core_index); gro_result = napi_gro_frags(napi); } else { @@ -571,8 +571,14 @@ static void efx_rx_deliver(struct efx_channel *channel, /* Set the SKB flags */ skb_checksum_none_assert(skb); + /* Record the rx_queue */ + skb_record_rx_queue(skb, channel->rx_queue.core_index); + /* Pass the packet up */ - netif_receive_skb(skb); + if (channel->type->receive_skb) + channel->type->receive_skb(channel, skb); + else + netif_receive_skb(skb); /* Update allocation strategy method */ channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB; @@ -608,13 +614,14 @@ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf) * at the ethernet header */ skb->protocol = eth_type_trans(skb, efx->net_dev); - skb_record_rx_queue(skb, channel->channel); + skb_record_rx_queue(skb, channel->rx_queue.core_index); } if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; - if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED))) + if (likely(rx_buf->flags & (EFX_RX_BUF_PAGE | EFX_RX_PKT_CSUMMED)) && + !channel->type->receive_skb) efx_rx_packet_gro(channel, rx_buf, eh); else efx_rx_deliver(channel, rx_buf); @@ -624,6 +631,11 @@ void efx_rx_strategy(struct efx_channel *channel) { enum efx_rx_alloc_method method = rx_alloc_method; + if (channel->type->receive_skb) { + channel->rx_alloc_push_pages = false; + return; + } + /* Only makes sense to use page based allocation if GRO is enabled */ if (!(channel->efx->net_dev->features & NETIF_F_GRO)) { method = RX_ALLOC_METHOD_SKB; diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 96068d15b60..ce72ae4f399 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -614,7 +614,8 @@ static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests, { enum efx_loopback_mode mode; struct efx_loopback_state *state; - struct efx_channel *channel = efx_get_channel(efx, 0); + struct efx_channel *channel = + efx_get_channel(efx, efx->tx_channel_offset); struct efx_tx_queue *tx_queue; int rc = 0; diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 6bafd216e55..84b41bf08a3 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -335,6 +335,7 @@ static int siena_probe_nic(struct efx_nic *efx) goto fail5; efx_sriov_probe(efx); + efx_ptp_probe(efx); return 0; diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index 9cb3b84ecae..d49b53dc2a5 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -21,6 +21,9 @@ /* Number of longs required to track all the VIs in a VF */ #define VI_MASK_LENGTH BITS_TO_LONGS(1 << EFX_VI_SCALE_MAX) +/* Maximum number of RX queues supported */ +#define VF_MAX_RX_QUEUES 63 + /** * enum efx_vf_tx_filter_mode - TX MAC filtering behaviour * @VF_TX_FILTER_OFF: Disabled @@ -578,6 +581,7 @@ static int efx_vfdi_init_rxq(struct efx_vf *vf) efx_oword_t reg; if (bad_vf_index(efx, vf_evq) || bad_vf_index(efx, vf_rxq) || + vf_rxq >= VF_MAX_RX_QUEUES || bad_buf_count(buf_count, EFX_MAX_DMAQ_SIZE)) { if (net_ratelimit()) netif_err(efx, hw, efx->net_dev, @@ -683,6 +687,9 @@ static int efx_vfdi_fini_all_queues(struct efx_vf *vf) __le32 *rxqs; int rc; + BUILD_BUG_ON(VF_MAX_RX_QUEUES > + MC_CMD_FLUSH_RX_QUEUES_IN_QID_OFST_MAXNUM); + rxqs = kmalloc(count * sizeof(*rxqs), GFP_KERNEL); if (rxqs == NULL) return VFDI_RC_ENOMEM; @@ -1028,6 +1035,7 @@ efx_sriov_get_channel_name(struct efx_channel *channel, char *buf, size_t len) static const struct efx_channel_type efx_sriov_channel_type = { .handle_no_channel = efx_sriov_handle_no_channel, .pre_probe = efx_sriov_probe_channel, + .post_remove = efx_channel_dummy_op_void, .get_name = efx_sriov_get_channel_name, /* no copy operation; channel must not be reallocated */ .keep_eventq = true, diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 18713436b44..5e090e54298 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -22,14 +22,6 @@ #include "nic.h" #include "workarounds.h" -/* - * TX descriptor ring full threshold - * - * The tx_queue descriptor ring fill-level must fall below this value - * before we restart the netif queue - */ -#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u) - static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, @@ -39,67 +31,32 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct device *dma_dev = &tx_queue->efx->pci_dev->dev; dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len - buffer->unmap_len); - if (buffer->unmap_single) + if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); else dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); buffer->unmap_len = 0; - buffer->unmap_single = false; } - if (buffer->skb) { + if (buffer->flags & EFX_TX_BUF_SKB) { (*pkts_compl)++; (*bytes_compl) += buffer->skb->len; dev_kfree_skb_any((struct sk_buff *) buffer->skb); - buffer->skb = NULL; netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, "TX queue %d transmission id %x complete\n", tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_HEAP) { + kfree(buffer->heap_buf); } -} -/** - * struct efx_tso_header - a DMA mapped buffer for packet headers - * @next: Linked list of free ones. - * The list is protected by the TX queue lock. - * @dma_unmap_len: Length to unmap for an oversize buffer, or 0. - * @dma_addr: The DMA address of the header below. - * - * This controls the memory used for a TSO header. Use TSOH_DATA() - * to find the packet header data. Use TSOH_SIZE() to calculate the - * total size required for a given packet header length. TSO headers - * in the free list are exactly %TSOH_STD_SIZE bytes in size. - */ -struct efx_tso_header { - union { - struct efx_tso_header *next; - size_t unmap_len; - }; - dma_addr_t dma_addr; -}; + buffer->len = 0; + buffer->flags = 0; +} static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb); -static void efx_fini_tso(struct efx_tx_queue *tx_queue); -static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh); - -static void efx_tsoh_free(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer) -{ - if (buffer->tsoh) { - if (likely(!buffer->tsoh->unmap_len)) { - buffer->tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = buffer->tsoh; - } else { - efx_tsoh_heap_free(tx_queue, buffer->tsoh); - } - buffer->tsoh = NULL; - } -} - static inline unsigned efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) @@ -138,6 +95,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) return max_descs; } +/* Get partner of a TX queue, seen as part of the same net core queue */ +static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) +{ + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) + return tx_queue - EFX_TXQ_TYPE_OFFLOAD; + else + return tx_queue + EFX_TXQ_TYPE_OFFLOAD; +} + +static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) +{ + /* We need to consider both queues that the net core sees as one */ + struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1); + struct efx_nic *efx = txq1->efx; + unsigned int fill_level; + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + if (likely(fill_level < efx->txq_stop_thresh)) + return; + + /* We used the stale old_read_count above, which gives us a + * pessimistic estimate of the fill level (which may even + * validly be >= efx->txq_entries). Now try again using + * read_count (more likely to be a cache miss). + * + * If we read read_count and then conditionally stop the + * queue, it is possible for the completion path to race with + * us and complete all outstanding descriptors in the middle, + * after which there will be no more completions to wake it. + * Therefore we stop the queue first, then read read_count + * (with a memory barrier to ensure the ordering), then + * restart the queue if the fill level turns out to be low + * enough. + */ + netif_tx_stop_queue(txq1->core_txq); + smp_mb(); + txq1->old_read_count = ACCESS_ONCE(txq1->read_count); + txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries); + if (likely(fill_level < efx->txq_stop_thresh)) { + smp_mb(); + if (likely(!efx->loopback_selftest)) + netif_tx_start_queue(txq1->core_txq); + } +} + /* * Add a socket buffer to a TX queue * @@ -151,7 +158,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) * This function is split out from efx_hard_start_xmit to allow the * loopback test to direct packets via specific TX queues. * - * Returns NETDEV_TX_OK or NETDEV_TX_BUSY + * Returns NETDEV_TX_OK. * You must hold netif_tx_lock() to call this function. */ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) @@ -160,12 +167,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct device *dma_dev = &efx->pci_dev->dev; struct efx_tx_buffer *buffer; skb_frag_t *fragment; - unsigned int len, unmap_len = 0, fill_level, insert_ptr; + unsigned int len, unmap_len = 0, insert_ptr; dma_addr_t dma_addr, unmap_addr = 0; unsigned int dma_len; - bool unmap_single; - int q_space, i = 0; - netdev_tx_t rc = NETDEV_TX_OK; + unsigned short dma_flags; + int i = 0; EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); @@ -183,14 +189,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) return NETDEV_TX_OK; } - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - q_space = efx->txq_entries - 1 - fill_level; - /* Map for DMA. Use dma_map_single rather than dma_map_page * since this is more efficient on machines with sparse * memory. */ - unmap_single = true; + dma_flags = EFX_TX_BUF_MAP_SINGLE; dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE); /* Process all fragments */ @@ -205,39 +208,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Add to TX queue, splitting across DMA boundaries */ do { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have - * happened since the xmit path last - * checked. Update the xmit path's - * copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the - * change of queue state from the access - * of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - rc = NETDEV_TX_BUSY; - goto unwind; - } - smp_mb(); - if (likely(!efx->loopback_selftest)) - netif_tx_start_queue( - tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->tsoh); - EFX_BUG_ON_PARANOID(buffer->skb); + EFX_BUG_ON_PARANOID(buffer->flags); EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(!buffer->continuation); EFX_BUG_ON_PARANOID(buffer->unmap_len); dma_len = efx_max_tx_len(efx, dma_addr); @@ -247,13 +221,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Fill out per descriptor fields */ buffer->len = dma_len; buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; len -= dma_len; dma_addr += dma_len; ++tx_queue->insert_count; } while (len); /* Transfer ownership of the unmapping to the final buffer */ - buffer->unmap_single = unmap_single; + buffer->flags = EFX_TX_BUF_CONT | dma_flags; buffer->unmap_len = unmap_len; unmap_len = 0; @@ -264,20 +239,22 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) len = skb_frag_size(fragment); i++; /* Map for DMA */ - unmap_single = false; + dma_flags = 0; dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, DMA_TO_DEVICE); } /* Transfer ownership of the skb to the final buffer */ buffer->skb = skb; - buffer->continuation = false; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; netdev_tx_sent_queue(tx_queue->core_txq, skb->len); /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + return NETDEV_TX_OK; dma_err: @@ -289,7 +266,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Mark the packet as transmitted, and free the SKB ourselves */ dev_kfree_skb_any(skb); - unwind: /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { unsigned int pkts_compl = 0, bytes_compl = 0; @@ -297,12 +273,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - buffer->len = 0; } /* Free the fragment we were mid-way through pushing */ if (unmap_len) { - if (unmap_single) + if (dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, unmap_len, DMA_TO_DEVICE); else @@ -310,7 +285,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) DMA_TO_DEVICE); } - return rc; + return NETDEV_TX_OK; } /* Remove packets from the TX queue @@ -340,8 +315,6 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, } efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl); - buffer->continuation = true; - buffer->len = 0; ++tx_queue->read_count; read_ptr = tx_queue->read_count & tx_queue->ptr_mask; @@ -366,6 +339,12 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb, EFX_WARN_ON_PARANOID(!netif_device_present(net_dev)); + /* PTP "event" packet */ + if (unlikely(efx_xmit_with_hwtstamp(skb)) && + unlikely(efx_ptp_is_ptp_tx(efx, skb))) { + return efx_ptp_tx(efx, skb); + } + index = skb_get_queue_mapping(skb); type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0; if (index >= efx->n_tx_channels) { @@ -450,6 +429,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned fill_level; struct efx_nic *efx = tx_queue->efx; + struct efx_tx_queue *txq2; unsigned int pkts_compl = 0, bytes_compl = 0; EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); @@ -457,15 +437,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); - /* See if we need to restart the netif queue. This barrier - * separates the update of read_count from the test of the - * queue state. */ + /* See if we need to restart the netif queue. This memory + * barrier ensures that we write read_count (inside + * efx_dequeue_buffers()) before reading the queue status. + */ smp_mb(); if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && likely(efx->port_enabled) && likely(netif_device_present(efx->net_dev))) { - fill_level = tx_queue->insert_count - tx_queue->read_count; - if (fill_level < EFX_TXQ_THRESHOLD(efx)) + txq2 = efx_tx_queue_partner(tx_queue); + fill_level = max(tx_queue->insert_count - tx_queue->read_count, + txq2->insert_count - txq2->read_count); + if (fill_level <= efx->txq_wake_thresh) netif_tx_wake_queue(tx_queue->core_txq); } @@ -480,11 +463,26 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) } } +/* Size of page-based TSO header buffers. Larger blocks must be + * allocated from the heap. + */ +#define TSOH_STD_SIZE 128 +#define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE) + +/* At most half the descriptors in the queue at any time will refer to + * a TSO header buffer, since they must always be followed by a + * payload descriptor referring to an skb. + */ +static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue) +{ + return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE); +} + int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) { struct efx_nic *efx = tx_queue->efx; unsigned int entries; - int i, rc; + int rc; /* Create the smallest power-of-two aligned ring */ entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); @@ -500,17 +498,28 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) GFP_KERNEL); if (!tx_queue->buffer) return -ENOMEM; - for (i = 0; i <= tx_queue->ptr_mask; ++i) - tx_queue->buffer[i].continuation = true; + + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) { + tx_queue->tsoh_page = + kcalloc(efx_tsoh_page_count(tx_queue), + sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL); + if (!tx_queue->tsoh_page) { + rc = -ENOMEM; + goto fail1; + } + } /* Allocate hardware ring */ rc = efx_nic_probe_tx(tx_queue); if (rc) - goto fail; + goto fail2; return 0; - fail: +fail2: + kfree(tx_queue->tsoh_page); + tx_queue->tsoh_page = NULL; +fail1: kfree(tx_queue->buffer); tx_queue->buffer = NULL; return rc; @@ -546,8 +555,6 @@ void efx_release_tx_buffers(struct efx_tx_queue *tx_queue) unsigned int pkts_compl = 0, bytes_compl = 0; buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - buffer->continuation = true; - buffer->len = 0; ++tx_queue->read_count; } @@ -568,13 +575,12 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) efx_nic_fini_tx(tx_queue); efx_release_tx_buffers(tx_queue); - - /* Free up TSO header cache */ - efx_fini_tso(tx_queue); } void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) { + int i; + if (!tx_queue->buffer) return; @@ -582,6 +588,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) "destroying TX queue %d\n", tx_queue->queue); efx_nic_remove_tx(tx_queue); + if (tx_queue->tsoh_page) { + for (i = 0; i < efx_tsoh_page_count(tx_queue); i++) + efx_nic_free_buffer(tx_queue->efx, + &tx_queue->tsoh_page[i]); + kfree(tx_queue->tsoh_page); + tx_queue->tsoh_page = NULL; + } + kfree(tx_queue->buffer); tx_queue->buffer = NULL; } @@ -604,22 +618,7 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) #define TSOH_OFFSET NET_IP_ALIGN #endif -#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET) - -/* Total size of struct efx_tso_header, buffer and padding */ -#define TSOH_SIZE(hdr_len) \ - (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len) - -/* Size of blocks on free list. Larger blocks must be allocated from - * the heap. - */ -#define TSOH_STD_SIZE 128 - #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) -#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data) -#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data) -#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data) -#define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data) /** * struct tso_state - TSO state for an SKB @@ -631,10 +630,12 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) * @in_len: Remaining length in current SKB fragment * @unmap_len: Length of SKB fragment * @unmap_addr: DMA address of SKB fragment - * @unmap_single: DMA single vs page mapping flag + * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0 * @protocol: Network protocol (after any VLAN header) + * @ip_off: Offset of IP header + * @tcp_off: Offset of TCP header * @header_len: Number of bytes of header - * @full_packet_size: Number of bytes to put in each outgoing segment + * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload * * The state used during segmentation. It is put into this data structure * just to make it easy to pass into inline functions. @@ -651,11 +652,13 @@ struct tso_state { unsigned in_len; unsigned unmap_len; dma_addr_t unmap_addr; - bool unmap_single; + unsigned short dma_flags; __be16 protocol; + unsigned int ip_off; + unsigned int tcp_off; unsigned header_len; - int full_packet_size; + unsigned int ip_base_len; }; @@ -687,91 +690,43 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb) return protocol; } - -/* - * Allocate a page worth of efx_tso_header structures, and string them - * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM. - */ -static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue) +static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, unsigned int len) { - struct device *dma_dev = &tx_queue->efx->pci_dev->dev; - struct efx_tso_header *tsoh; - dma_addr_t dma_addr; - u8 *base_kva, *kva; + u8 *result; - base_kva = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr, GFP_ATOMIC); - if (base_kva == NULL) { - netif_err(tx_queue->efx, tx_err, tx_queue->efx->net_dev, - "Unable to allocate page for TSO headers\n"); - return -ENOMEM; - } - - /* dma_alloc_coherent() allocates pages. */ - EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u)); - - for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) { - tsoh = (struct efx_tso_header *)kva; - tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva); - tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = tsoh; - } - - return 0; -} - - -/* Free up a TSO header, and all others in the same page. */ -static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh, - struct device *dma_dev) -{ - struct efx_tso_header **p; - unsigned long base_kva; - dma_addr_t base_dma; - - base_kva = (unsigned long)tsoh & PAGE_MASK; - base_dma = tsoh->dma_addr & PAGE_MASK; - - p = &tx_queue->tso_headers_free; - while (*p != NULL) { - if (((unsigned long)*p & PAGE_MASK) == base_kva) - *p = (*p)->next; - else - p = &(*p)->next; - } + EFX_BUG_ON_PARANOID(buffer->len); + EFX_BUG_ON_PARANOID(buffer->flags); + EFX_BUG_ON_PARANOID(buffer->unmap_len); - dma_free_coherent(dma_dev, PAGE_SIZE, (void *)base_kva, base_dma); -} + if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) { + unsigned index = + (tx_queue->insert_count & tx_queue->ptr_mask) / 2; + struct efx_buffer *page_buf = + &tx_queue->tsoh_page[index / TSOH_PER_PAGE]; + unsigned offset = + TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET; + + if (unlikely(!page_buf->addr) && + efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE)) + return NULL; + + result = (u8 *)page_buf->addr + offset; + buffer->dma_addr = page_buf->dma_addr + offset; + buffer->flags = EFX_TX_BUF_CONT; + } else { + tx_queue->tso_long_headers++; -static struct efx_tso_header * -efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len) -{ - struct efx_tso_header *tsoh; - - tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA); - if (unlikely(!tsoh)) - return NULL; - - tsoh->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, - TSOH_BUFFER(tsoh), header_len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, - tsoh->dma_addr))) { - kfree(tsoh); - return NULL; + buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC); + if (unlikely(!buffer->heap_buf)) + return NULL; + result = (u8 *)buffer->heap_buf + TSOH_OFFSET; + buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP; } - tsoh->unmap_len = header_len; - return tsoh; -} + buffer->len = len; -static void -efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) -{ - dma_unmap_single(&tx_queue->efx->pci_dev->dev, - tsoh->dma_addr, tsoh->unmap_len, - DMA_TO_DEVICE); - kfree(tsoh); + return result; } /** @@ -781,47 +736,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) * @len: Length of fragment * @final_buffer: The final buffer inserted into the queue * - * Push descriptors onto the TX queue. Return 0 on success or 1 if - * @tx_queue full. + * Push descriptors onto the TX queue. */ -static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, - dma_addr_t dma_addr, unsigned len, - struct efx_tx_buffer **final_buffer) +static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned len, + struct efx_tx_buffer **final_buffer) { struct efx_tx_buffer *buffer; struct efx_nic *efx = tx_queue->efx; - unsigned dma_len, fill_level, insert_ptr; - int q_space; + unsigned dma_len, insert_ptr; EFX_BUG_ON_PARANOID(len <= 0); - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - /* -1 as there is no way to represent all descriptors used */ - q_space = efx->txq_entries - 1 - fill_level; - while (1) { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have happened - * since the xmit path last checked. Update - * the xmit path's copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the change of - * queue state from the access of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - *final_buffer = NULL; - return 1; - } - smp_mb(); - netif_tx_start_queue(tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; ++tx_queue->insert_count; @@ -830,12 +757,9 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, tx_queue->read_count >= efx->txq_entries); - efx_tsoh_free(tx_queue, buffer); EFX_BUG_ON_PARANOID(buffer->len); EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->skb); - EFX_BUG_ON_PARANOID(!buffer->continuation); - EFX_BUG_ON_PARANOID(buffer->tsoh); + EFX_BUG_ON_PARANOID(buffer->flags); buffer->dma_addr = dma_addr; @@ -845,7 +769,8 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, if (dma_len >= len) break; - buffer->len = dma_len; /* Don't set the other members */ + buffer->len = dma_len; + buffer->flags = EFX_TX_BUF_CONT; dma_addr += dma_len; len -= dma_len; } @@ -853,7 +778,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, EFX_BUG_ON_PARANOID(!len); buffer->len = len; *final_buffer = buffer; - return 0; } @@ -864,54 +788,42 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, * a single fragment, and we know it doesn't cross a page boundary. It * also allows us to not worry about end-of-packet etc. */ -static void efx_tso_put_header(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh, unsigned len) +static int efx_tso_put_header(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, u8 *header) { - struct efx_tx_buffer *buffer; - - buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->unmap_len); - EFX_BUG_ON_PARANOID(buffer->skb); - EFX_BUG_ON_PARANOID(!buffer->continuation); - EFX_BUG_ON_PARANOID(buffer->tsoh); - buffer->len = len; - buffer->dma_addr = tsoh->dma_addr; - buffer->tsoh = tsoh; + if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) { + buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, + header, buffer->len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, + buffer->dma_addr))) { + kfree(buffer->heap_buf); + buffer->len = 0; + buffer->flags = 0; + return -ENOMEM; + } + buffer->unmap_len = buffer->len; + buffer->flags |= EFX_TX_BUF_MAP_SINGLE; + } ++tx_queue->insert_count; + return 0; } -/* Remove descriptors put into a tx_queue. */ +/* Remove buffers put into a tx_queue. None of the buffers must have + * an skb attached. + */ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) { struct efx_tx_buffer *buffer; - dma_addr_t unmap_addr; /* Work backwards until we hit the original insert pointer value */ while (tx_queue->insert_count != tx_queue->write_count) { --tx_queue->insert_count; buffer = &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->skb); - if (buffer->unmap_len) { - unmap_addr = (buffer->dma_addr + buffer->len - - buffer->unmap_len); - if (buffer->unmap_single) - dma_unmap_single(&tx_queue->efx->pci_dev->dev, - unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - else - dma_unmap_page(&tx_queue->efx->pci_dev->dev, - unmap_addr, buffer->unmap_len, - DMA_TO_DEVICE); - buffer->unmap_len = 0; - } - buffer->len = 0; - buffer->continuation = true; + efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); } } @@ -919,17 +831,16 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) /* Parse the SKB header and initialise state. */ static void tso_start(struct tso_state *st, const struct sk_buff *skb) { - /* All ethernet/IP/TCP headers combined size is TCP header size - * plus offset of TCP header relative to start of packet. - */ - st->header_len = ((tcp_hdr(skb)->doff << 2u) - + PTR_DIFF(tcp_hdr(skb), skb->data)); - st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size; - - if (st->protocol == htons(ETH_P_IP)) + st->ip_off = skb_network_header(skb) - skb->data; + st->tcp_off = skb_transport_header(skb) - skb->data; + st->header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); + if (st->protocol == htons(ETH_P_IP)) { + st->ip_base_len = st->header_len - st->ip_off; st->ipv4_id = ntohs(ip_hdr(skb)->id); - else + } else { + st->ip_base_len = st->header_len - st->tcp_off; st->ipv4_id = 0; + } st->seqnum = ntohl(tcp_hdr(skb)->seq); EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); @@ -938,7 +849,7 @@ static void tso_start(struct tso_state *st, const struct sk_buff *skb) st->out_len = skb->len - st->header_len; st->unmap_len = 0; - st->unmap_single = false; + st->dma_flags = 0; } static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, @@ -947,7 +858,7 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->unmap_single = false; + st->dma_flags = 0; st->unmap_len = skb_frag_size(frag); st->in_len = skb_frag_size(frag); st->dma_addr = st->unmap_addr; @@ -965,7 +876,7 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = dma_map_single(&efx->pci_dev->dev, skb->data + hl, len, DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->unmap_single = true; + st->dma_flags = EFX_TX_BUF_MAP_SINGLE; st->unmap_len = len; st->in_len = len; st->dma_addr = st->unmap_addr; @@ -982,20 +893,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, * @st: TSO state * * Form descriptors for the current fragment, until we reach the end - * of fragment or end-of-packet. Return 0 on success, 1 if not enough - * space in @tx_queue. + * of fragment or end-of-packet. */ -static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, - const struct sk_buff *skb, - struct tso_state *st) +static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, + const struct sk_buff *skb, + struct tso_state *st) { struct efx_tx_buffer *buffer; - int n, end_of_packet, rc; + int n; if (st->in_len == 0) - return 0; + return; if (st->packet_space == 0) - return 0; + return; EFX_BUG_ON_PARANOID(st->in_len <= 0); EFX_BUG_ON_PARANOID(st->packet_space <= 0); @@ -1006,25 +916,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, st->out_len -= n; st->in_len -= n; - rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - if (likely(rc == 0)) { - if (st->out_len == 0) - /* Transfer ownership of the skb */ - buffer->skb = skb; + efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - end_of_packet = st->out_len == 0 || st->packet_space == 0; - buffer->continuation = !end_of_packet; + if (st->out_len == 0) { + /* Transfer ownership of the skb */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB; + } else if (st->packet_space != 0) { + buffer->flags = EFX_TX_BUF_CONT; + } - if (st->in_len == 0) { - /* Transfer ownership of the DMA mapping */ - buffer->unmap_len = st->unmap_len; - buffer->unmap_single = st->unmap_single; - st->unmap_len = 0; - } + if (st->in_len == 0) { + /* Transfer ownership of the DMA mapping */ + buffer->unmap_len = st->unmap_len; + buffer->flags |= st->dma_flags; + st->unmap_len = 0; } st->dma_addr += n; - return rc; } @@ -1035,36 +944,25 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, * @st: TSO state * * Generate a new header and prepare for the new packet. Return 0 on - * success, or -1 if failed to alloc header. + * success, or -%ENOMEM if failed to alloc header. */ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, const struct sk_buff *skb, struct tso_state *st) { - struct efx_tso_header *tsoh; + struct efx_tx_buffer *buffer = + &tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask]; struct tcphdr *tsoh_th; unsigned ip_length; u8 *header; + int rc; - /* Allocate a DMA-mapped header buffer. */ - if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) { - if (tx_queue->tso_headers_free == NULL) { - if (efx_tsoh_block_alloc(tx_queue)) - return -1; - } - EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free); - tsoh = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = tsoh->next; - tsoh->unmap_len = 0; - } else { - tx_queue->tso_long_headers++; - tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len); - if (unlikely(!tsoh)) - return -1; - } + /* Allocate and insert a DMA-mapped header buffer. */ + header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); + if (!header) + return -ENOMEM; - header = TSOH_BUFFER(tsoh); - tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb)); + tsoh_th = (struct tcphdr *)(header + st->tcp_off); /* Copy and update the headers. */ memcpy(header, skb->data, st->header_len); @@ -1073,19 +971,19 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, st->seqnum += skb_shinfo(skb)->gso_size; if (st->out_len > skb_shinfo(skb)->gso_size) { /* This packet will not finish the TSO burst. */ - ip_length = st->full_packet_size - ETH_HDR_LEN(skb); + st->packet_space = skb_shinfo(skb)->gso_size; tsoh_th->fin = 0; tsoh_th->psh = 0; } else { /* This packet will be the last in the TSO burst. */ - ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len; + st->packet_space = st->out_len; tsoh_th->fin = tcp_hdr(skb)->fin; tsoh_th->psh = tcp_hdr(skb)->psh; } + ip_length = st->ip_base_len + st->packet_space; if (st->protocol == htons(ETH_P_IP)) { - struct iphdr *tsoh_iph = - (struct iphdr *)(header + SKB_IPV4_OFF(skb)); + struct iphdr *tsoh_iph = (struct iphdr *)(header + st->ip_off); tsoh_iph->tot_len = htons(ip_length); @@ -1094,16 +992,16 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, st->ipv4_id++; } else { struct ipv6hdr *tsoh_iph = - (struct ipv6hdr *)(header + SKB_IPV6_OFF(skb)); + (struct ipv6hdr *)(header + st->ip_off); - tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph)); + tsoh_iph->payload_len = htons(ip_length); } - st->packet_space = skb_shinfo(skb)->gso_size; - ++tx_queue->tso_packets; + rc = efx_tso_put_header(tx_queue, buffer, header); + if (unlikely(rc)) + return rc; - /* Form a descriptor for this header. */ - efx_tso_put_header(tx_queue, tsoh, st->header_len); + ++tx_queue->tso_packets; return 0; } @@ -1118,13 +1016,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, * * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if * @skb was not enqueued. In all cases @skb is consumed. Return - * %NETDEV_TX_OK or %NETDEV_TX_BUSY. + * %NETDEV_TX_OK. */ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { struct efx_nic *efx = tx_queue->efx; - int frag_i, rc, rc2 = NETDEV_TX_OK; + int frag_i, rc; struct tso_state state; /* Find the packet protocol and sanity-check it */ @@ -1156,11 +1054,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, goto mem_err; while (1) { - rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); - if (unlikely(rc)) { - rc2 = NETDEV_TX_BUSY; - goto unwind; - } + tso_fill_packet_with_fragment(tx_queue, skb, &state); /* Move onto the next fragment? */ if (state.in_len == 0) { @@ -1184,6 +1078,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + tx_queue->tso_bursts++; return NETDEV_TX_OK; @@ -1192,10 +1088,9 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, "Out of memory for TSO headers, or DMA mapping error\n"); dev_kfree_skb_any(skb); - unwind: /* Free the DMA mapping we were in the process of writing out */ if (state.unmap_len) { - if (state.unmap_single) + if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, state.unmap_len, DMA_TO_DEVICE); else @@ -1204,25 +1099,5 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, } efx_enqueue_unwind(tx_queue); - return rc2; -} - - -/* - * Free up all TSO datastructures associated with tx_queue. This - * routine should be called only once the tx_queue is both empty and - * will no longer be used. - */ -static void efx_fini_tso(struct efx_tx_queue *tx_queue) -{ - unsigned i; - - if (tx_queue->buffer) { - for (i = 0; i <= tx_queue->ptr_mask; ++i) - efx_tsoh_free(tx_queue, &tx_queue->buffer[i]); - } - - while (tx_queue->tso_headers_free != NULL) - efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free, - &tx_queue->efx->pci_dev->dev); + return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c index b5ba3084c7f..3e5519a0acc 100644 --- a/drivers/net/ethernet/sgi/ioc3-eth.c +++ b/drivers/net/ethernet/sgi/ioc3-eth.c @@ -1147,15 +1147,17 @@ static void __devinit ioc3_8250_register(struct ioc3_uartregs __iomem *uart) { #define COSMISC_CONSTANT 6 - struct uart_port port = { - .irq = 0, - .flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF, - .iotype = UPIO_MEM, - .regshift = 0, - .uartclk = (22000000 << 1) / COSMISC_CONSTANT, - - .membase = (unsigned char __iomem *) uart, - .mapbase = (unsigned long) uart, + struct uart_8250_port port = { + .port = { + .irq = 0, + .flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF, + .iotype = UPIO_MEM, + .regshift = 0, + .uartclk = (22000000 << 1) / COSMISC_CONSTANT, + + .membase = (unsigned char __iomem *) uart, + .mapbase = (unsigned long) uart, + } }; unsigned char lcr; @@ -1164,7 +1166,7 @@ static void __devinit ioc3_8250_register(struct ioc3_uartregs __iomem *uart) uart->iu_scr = COSMISC_CONSTANT, uart->iu_lcr = lcr; uart->iu_lcr; - serial8250_register_port(&port); + serial8250_register_8250_port(&port); } static void __devinit ioc3_serial_probe(struct pci_dev *pdev, struct ioc3 *ioc3) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index ade10823204..0376a5e6b2b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -177,7 +177,7 @@ int stmmac_mdio_register(struct net_device *ndev) new_bus->write = &stmmac_mdio_write; new_bus->reset = &stmmac_mdio_reset; snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x", - new_bus->name, mdio_bus_data->bus_id); + new_bus->name, priv->plat->bus_id); new_bus->priv = ndev; new_bus->irq = irqlist; new_bus->phy_mask = mdio_bus_data->phy_mask; @@ -213,12 +213,10 @@ int stmmac_mdio_register(struct net_device *ndev) * and no PHY number was provided to the MAC, * use the one probed here. */ - if ((priv->plat->bus_id == mdio_bus_data->bus_id) && - (priv->plat->phy_addr == -1)) + if (priv->plat->phy_addr == -1) priv->plat->phy_addr = addr; - act = (priv->plat->bus_id == mdio_bus_data->bus_id) && - (priv->plat->phy_addr == addr); + act = (priv->plat->phy_addr == addr); switch (phydev->irq) { case PHY_POLL: irq_str = "POLL"; @@ -258,6 +256,9 @@ int stmmac_mdio_unregister(struct net_device *ndev) { struct stmmac_priv *priv = netdev_priv(ndev); + if (!priv->mii) + return 0; + mdiobus_unregister(priv->mii); priv->mii->priv = NULL; mdiobus_free(priv->mii); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 13afb8edfad..1f069b0f6af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -40,7 +40,6 @@ static void stmmac_default_data(void) plat_dat.has_gmac = 1; plat_dat.force_sf_dma_mode = 1; - mdio_data.bus_id = 1; mdio_data.phy_reset = NULL; mdio_data.phy_mask = 0; plat_dat.mdio_bus_data = &mdio_data; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index b93245c1199..ed112b55ae7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -78,6 +78,7 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev) { int ret = 0; struct resource *res; + struct device *dev = &pdev->dev; void __iomem *addr = NULL; struct stmmac_priv *priv = NULL; struct plat_stmmacenet_data *plat_dat = NULL; @@ -87,18 +88,10 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev) if (!res) return -ENODEV; - if (!request_mem_region(res->start, resource_size(res), pdev->name)) { - pr_err("%s: ERROR: memory allocation failed" - "cannot get the I/O addr 0x%x\n", - __func__, (unsigned int)res->start); - return -EBUSY; - } - - addr = ioremap(res->start, resource_size(res)); + addr = devm_request_and_ioremap(dev, res); if (!addr) { pr_err("%s: ERROR: memory mapping failed", __func__); - ret = -ENOMEM; - goto out_release_region; + return -ENOMEM; } if (pdev->dev.of_node) { @@ -107,14 +100,13 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev) GFP_KERNEL); if (!plat_dat) { pr_err("%s: ERROR: no memory", __func__); - ret = -ENOMEM; - goto out_unmap; + return -ENOMEM; } ret = stmmac_probe_config_dt(pdev, plat_dat, &mac); if (ret) { pr_err("%s: main dt probe failed", __func__); - goto out_unmap; + return ret; } } else { plat_dat = pdev->dev.platform_data; @@ -124,13 +116,13 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev) if (plat_dat->init) { ret = plat_dat->init(pdev); if (unlikely(ret)) - goto out_unmap; + return ret; } priv = stmmac_dvr_probe(&(pdev->dev), plat_dat, addr); if (!priv) { pr_err("%s: main driver probe failed", __func__); - goto out_unmap; + return -ENODEV; } /* Get MAC address if available (DT) */ @@ -142,8 +134,7 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev) if (priv->dev->irq == -ENXIO) { pr_err("%s: ERROR: MAC IRQ configuration " "information not found\n", __func__); - ret = -ENXIO; - goto out_unmap; + return -ENXIO; } /* @@ -165,15 +156,6 @@ static int __devinit stmmac_pltfr_probe(struct platform_device *pdev) pr_debug("STMMAC platform driver registration completed"); return 0; - -out_unmap: - iounmap(addr); - platform_set_drvdata(pdev, NULL); - -out_release_region: - release_mem_region(res->start, resource_size(res)); - - return ret; } /** @@ -186,7 +168,6 @@ static int stmmac_pltfr_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); - struct resource *res; int ret = stmmac_dvr_remove(ndev); if (priv->plat->exit) @@ -194,10 +175,6 @@ static int stmmac_pltfr_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); - iounmap((void __force __iomem *)priv->ioaddr); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - return ret; } diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index ce4df61b4b5..c8251be104d 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -3890,7 +3890,7 @@ static int cas_change_mtu(struct net_device *dev, int new_mtu) schedule_work(&cp->reset_task); #endif - flush_work_sync(&cp->reset_task); + flush_work(&cp->reset_task); return 0; } diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index c2a0fe39326..8419bf385e0 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9762,9 +9762,8 @@ static int __devinit niu_pci_init_one(struct pci_dev *pdev, union niu_parent_id parent_id; struct net_device *dev; struct niu *np; - int err, pos; + int err; u64 dma_mask; - u16 val16; niu_driver_version(); @@ -9787,8 +9786,7 @@ static int __devinit niu_pci_init_one(struct pci_dev *pdev, goto err_out_disable_pdev; } - pos = pci_pcie_cap(pdev); - if (pos <= 0) { + if (!pci_is_pcie(pdev)) { dev_err(&pdev->dev, "Cannot find PCI Express capability, aborting\n"); goto err_out_free_res; } @@ -9813,14 +9811,11 @@ static int __devinit niu_pci_init_one(struct pci_dev *pdev, goto err_out_free_dev; } - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &val16); - val16 &= ~PCI_EXP_DEVCTL_NOSNOOP_EN; - val16 |= (PCI_EXP_DEVCTL_CERE | - PCI_EXP_DEVCTL_NFERE | - PCI_EXP_DEVCTL_FERE | - PCI_EXP_DEVCTL_URRE | - PCI_EXP_DEVCTL_RELAX_EN); - pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, val16); + pcie_capability_clear_and_set_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_NOSNOOP_EN, + PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE | + PCI_EXP_DEVCTL_RELAX_EN); dma_mask = DMA_BIT_MASK(44); err = pci_set_dma_mask(pdev, dma_mask); @@ -9932,7 +9927,7 @@ static int niu_suspend(struct pci_dev *pdev, pm_message_t state) if (!netif_running(dev)) return 0; - flush_work_sync(&np->reset_task); + flush_work(&np->reset_task); niu_netif_stop(np); del_timer_sync(&np->timer); diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 967fe8cb476..c9c977bf02a 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -212,7 +212,6 @@ static void bigmac_clean_rings(struct bigmac *bp) static void bigmac_init_rings(struct bigmac *bp, int from_irq) { struct bmac_init_block *bb = bp->bmac_block; - struct net_device *dev = bp->dev; int i; gfp_t gfp_flags = GFP_KERNEL; diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 1b173a6145d..b26cbda5efa 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -32,7 +32,7 @@ config TI_DAVINCI_EMAC config TI_DAVINCI_MDIO tristate "TI DaVinci MDIO Support" - depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 ) + depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX ) select PHYLIB ---help--- This driver supports TI's DaVinci MDIO module. @@ -42,7 +42,7 @@ config TI_DAVINCI_MDIO config TI_DAVINCI_CPDMA tristate "TI DaVinci CPDMA Support" - depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 ) + depends on ARM && ( ARCH_DAVINCI || ARCH_OMAP3 || SOC_AM33XX ) ---help--- This driver supports TI's DaVinci CPDMA dma engine. diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 1e5d85b06e7..df55e240374 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -28,6 +28,9 @@ #include <linux/workqueue.h> #include <linux/delay.h> #include <linux/pm_runtime.h> +#include <linux/of.h> +#include <linux/of_net.h> +#include <linux/of_device.h> #include <linux/platform_data/cpsw.h> @@ -383,6 +386,11 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, mac_control |= BIT(7); /* GIGABITEN */ if (phy->duplex) mac_control |= BIT(0); /* FULLDUPLEXEN */ + + /* set speed_in input in case RMII mode is used in 100Mbps */ + if (phy->speed == 100) + mac_control |= BIT(15); + *link = true; } else { mac_control = 0; @@ -709,6 +717,158 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->sliver = regs + data->sliver_reg_ofs; } +static int cpsw_probe_dt(struct cpsw_platform_data *data, + struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + struct device_node *slave_node; + int i = 0, ret; + u32 prop; + + if (!node) + return -EINVAL; + + if (of_property_read_u32(node, "slaves", &prop)) { + pr_err("Missing slaves property in the DT.\n"); + return -EINVAL; + } + data->slaves = prop; + + data->slave_data = kzalloc(sizeof(struct cpsw_slave_data) * + data->slaves, GFP_KERNEL); + if (!data->slave_data) { + pr_err("Could not allocate slave memory.\n"); + return -EINVAL; + } + + data->no_bd_ram = of_property_read_bool(node, "no_bd_ram"); + + if (of_property_read_u32(node, "cpdma_channels", &prop)) { + pr_err("Missing cpdma_channels property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->channels = prop; + + if (of_property_read_u32(node, "host_port_no", &prop)) { + pr_err("Missing host_port_no property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->host_port_num = prop; + + if (of_property_read_u32(node, "cpdma_reg_ofs", &prop)) { + pr_err("Missing cpdma_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->cpdma_reg_ofs = prop; + + if (of_property_read_u32(node, "cpdma_sram_ofs", &prop)) { + pr_err("Missing cpdma_sram_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->cpdma_sram_ofs = prop; + + if (of_property_read_u32(node, "ale_reg_ofs", &prop)) { + pr_err("Missing ale_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->ale_reg_ofs = prop; + + if (of_property_read_u32(node, "ale_entries", &prop)) { + pr_err("Missing ale_entries property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->ale_entries = prop; + + if (of_property_read_u32(node, "host_port_reg_ofs", &prop)) { + pr_err("Missing host_port_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->host_port_reg_ofs = prop; + + if (of_property_read_u32(node, "hw_stats_reg_ofs", &prop)) { + pr_err("Missing hw_stats_reg_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->hw_stats_reg_ofs = prop; + + if (of_property_read_u32(node, "bd_ram_ofs", &prop)) { + pr_err("Missing bd_ram_ofs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->bd_ram_ofs = prop; + + if (of_property_read_u32(node, "bd_ram_size", &prop)) { + pr_err("Missing bd_ram_size property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->bd_ram_size = prop; + + if (of_property_read_u32(node, "rx_descs", &prop)) { + pr_err("Missing rx_descs property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->rx_descs = prop; + + if (of_property_read_u32(node, "mac_control", &prop)) { + pr_err("Missing mac_control property in the DT.\n"); + ret = -EINVAL; + goto error_ret; + } + data->mac_control = prop; + + for_each_child_of_node(node, slave_node) { + struct cpsw_slave_data *slave_data = data->slave_data + i; + const char *phy_id = NULL; + const void *mac_addr = NULL; + + if (of_property_read_string(slave_node, "phy_id", &phy_id)) { + pr_err("Missing slave[%d] phy_id property\n", i); + ret = -EINVAL; + goto error_ret; + } + slave_data->phy_id = phy_id; + + if (of_property_read_u32(slave_node, "slave_reg_ofs", &prop)) { + pr_err("Missing slave[%d] slave_reg_ofs property\n", i); + ret = -EINVAL; + goto error_ret; + } + slave_data->slave_reg_ofs = prop; + + if (of_property_read_u32(slave_node, "sliver_reg_ofs", + &prop)) { + pr_err("Missing slave[%d] sliver_reg_ofs property\n", + i); + ret = -EINVAL; + goto error_ret; + } + slave_data->sliver_reg_ofs = prop; + + mac_addr = of_get_mac_address(slave_node); + if (mac_addr) + memcpy(slave_data->mac_addr, mac_addr, ETH_ALEN); + + i++; + } + + return 0; + +error_ret: + kfree(data->slave_data); + return ret; +} + static int __devinit cpsw_probe(struct platform_device *pdev) { struct cpsw_platform_data *data = pdev->dev.platform_data; @@ -720,11 +880,6 @@ static int __devinit cpsw_probe(struct platform_device *pdev) struct resource *res; int ret = 0, i, k = 0; - if (!data) { - pr_err("platform data missing\n"); - return -ENODEV; - } - ndev = alloc_etherdev(sizeof(struct cpsw_priv)); if (!ndev) { pr_err("error allocating net_device\n"); @@ -734,13 +889,19 @@ static int __devinit cpsw_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); priv = netdev_priv(ndev); spin_lock_init(&priv->lock); - priv->data = *data; priv->pdev = pdev; priv->ndev = ndev; priv->dev = &ndev->dev; priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); priv->rx_packet_max = max(rx_packet_max, 128); + if (cpsw_probe_dt(&priv->data, pdev)) { + pr_err("cpsw: platform data missing\n"); + ret = -ENODEV; + goto clean_ndev_ret; + } + data = &priv->data; + if (is_valid_ether_addr(data->slave_data[0].mac_addr)) { memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN); pr_info("Detected MACID = %pM", priv->mac_addr); @@ -996,11 +1157,17 @@ static const struct dev_pm_ops cpsw_pm_ops = { .resume = cpsw_resume, }; +static const struct of_device_id cpsw_of_mtable[] = { + { .compatible = "ti,cpsw", }, + { /* sentinel */ }, +}; + static struct platform_driver cpsw_driver = { .driver = { .name = "cpsw", .owner = THIS_MODULE, .pm = &cpsw_pm_ops, + .of_match_table = of_match_ptr(cpsw_of_mtable), }, .probe = cpsw_probe, .remove = __devexit_p(cpsw_remove), diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index a9ca4a03d31..51a96dbee9a 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -36,6 +36,8 @@ #include <linux/io.h> #include <linux/pm_runtime.h> #include <linux/davinci_emac.h> +#include <linux/of.h> +#include <linux/of_device.h> /* * This timeout definition is a worst-case ultra defensive measure against @@ -289,6 +291,25 @@ static int davinci_mdio_write(struct mii_bus *bus, int phy_id, return 0; } +static int davinci_mdio_probe_dt(struct mdio_platform_data *data, + struct platform_device *pdev) +{ + struct device_node *node = pdev->dev.of_node; + u32 prop; + + if (!node) + return -EINVAL; + + if (of_property_read_u32(node, "bus_freq", &prop)) { + pr_err("Missing bus_freq property in the DT.\n"); + return -EINVAL; + } + data->bus_freq = prop; + + return 0; +} + + static int __devinit davinci_mdio_probe(struct platform_device *pdev) { struct mdio_platform_data *pdata = pdev->dev.platform_data; @@ -304,8 +325,6 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev) return -ENOMEM; } - data->pdata = pdata ? (*pdata) : default_pdata; - data->bus = mdiobus_alloc(); if (!data->bus) { dev_err(dev, "failed to alloc mii bus\n"); @@ -313,14 +332,22 @@ static int __devinit davinci_mdio_probe(struct platform_device *pdev) goto bail_out; } + if (dev->of_node) { + if (davinci_mdio_probe_dt(&data->pdata, pdev)) + data->pdata = default_pdata; + snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); + } else { + data->pdata = pdata ? (*pdata) : default_pdata; + snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); + } + data->bus->name = dev_name(dev); data->bus->read = davinci_mdio_read, data->bus->write = davinci_mdio_write, data->bus->reset = davinci_mdio_reset, data->bus->parent = dev; data->bus->priv = data; - snprintf(data->bus->id, MII_BUS_ID_SIZE, "%s-%x", - pdev->name, pdev->id); pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -456,11 +483,17 @@ static const struct dev_pm_ops davinci_mdio_pm_ops = { .resume = davinci_mdio_resume, }; +static const struct of_device_id davinci_mdio_of_mtable[] = { + { .compatible = "ti,davinci_mdio", }, + { /* sentinel */ }, +}; + static struct platform_driver davinci_mdio_driver = { .driver = { .name = "davinci_mdio", .owner = THIS_MODULE, .pm = &davinci_mdio_pm_ops, + .of_match_table = of_match_ptr(davinci_mdio_of_mtable), }, .probe = davinci_mdio_probe, .remove = __devexit_p(davinci_mdio_remove), diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 277c93e9ff4..8fa947a2d92 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1359,7 +1359,6 @@ static int tsi108_open(struct net_device *dev) } data->rxskbs[i] = skb; - data->rxskbs[i] = skb; data->rxring[i].buf0 = virt_to_phys(data->rxskbs[i]->data); data->rxring[i].misc = TSI108_RX_OWN | TSI108_RX_INT; } diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index a5826a3111a..2c08bf6e7bf 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -637,8 +637,7 @@ static int __devinit w5100_hw_probe(struct platform_device *pdev) if (data && is_valid_ether_addr(data->mac_addr)) { memcpy(ndev->dev_addr, data->mac_addr, ETH_ALEN); } else { - eth_random_addr(ndev->dev_addr); - ndev->addr_assign_type |= NET_ADDR_RANDOM; + eth_hw_addr_random(ndev); } mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c index bdd8891c215..88943d90c76 100644 --- a/drivers/net/ethernet/wiznet/w5300.c +++ b/drivers/net/ethernet/wiznet/w5300.c @@ -557,8 +557,7 @@ static int __devinit w5300_hw_probe(struct platform_device *pdev) if (data && is_valid_ether_addr(data->mac_addr)) { memcpy(ndev->dev_addr, data->mac_addr, ETH_ALEN); } else { - eth_random_addr(ndev->dev_addr); - ndev->addr_assign_type |= NET_ADDR_RANDOM; + eth_hw_addr_random(ndev); } mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); |