diff options
-rw-r--r-- | drivers/net/ethernet/sfc/Kconfig | 9 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/Makefile | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/bitfield.h | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ef10.c | 3043 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 34 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ethtool.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.c | 66 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.h | 8 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi_port.c | 21 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.h | 79 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/workarounds.h | 6 |
12 files changed, 3260 insertions, 22 deletions
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 4136ccc4a95..8b7152565c5 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -1,5 +1,5 @@ config SFC - tristate "Solarflare SFC4000/SFC9000-family support" + tristate "Solarflare SFC4000/SFC9000/SFC9100-family support" depends on PCI select MDIO select CRC32 @@ -8,12 +8,13 @@ config SFC select PTP_1588_CLOCK ---help--- This driver supports 10-gigabit Ethernet cards based on - the Solarflare SFC4000 and SFC9000-family controllers. + the Solarflare SFC4000, SFC9000-family and SFC9100-family + controllers. To compile this driver as a module, choose M here. The module will be called sfc. config SFC_MTD - bool "Solarflare SFC4000/SFC9000-family MTD support" + bool "Solarflare SFC4000/SFC9000/SFC9100-family MTD support" depends on SFC && MTD && !(SFC=y && MTD=m) default y ---help--- @@ -21,7 +22,7 @@ config SFC_MTD (e.g. /dev/mtd1). This is required to update the firmware or the boot configuration under Linux. config SFC_MCDI_MON - bool "Solarflare SFC9000-family hwmon support" + bool "Solarflare SFC9000/SFC9100-family hwmon support" depends on SFC && HWMON && !(SFC=y && HWMON=m) default y ---help--- diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index a61272661a7..3a83c0dca8e 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -1,5 +1,5 @@ -sfc-y += efx.o nic.o farch.o falcon.o siena.o tx.o rx.o \ - selftest.o ethtool.o qt202x_phy.o mdio_10g.o \ +sfc-y += efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \ + rx.o selftest.o ethtool.o qt202x_phy.o mdio_10g.o \ tenxpress.o txc43128_phy.o falcon_boards.o \ mcdi.o mcdi_port.o mcdi_mon.o ptp.o sfc-$(CONFIG_SFC_MTD) += mtd.o diff --git a/drivers/net/ethernet/sfc/bitfield.h b/drivers/net/ethernet/sfc/bitfield.h index 5400a33f254..f45b0db9469 100644 --- a/drivers/net/ethernet/sfc/bitfield.h +++ b/drivers/net/ethernet/sfc/bitfield.h @@ -29,6 +29,10 @@ /* Lowest bit numbers and widths */ #define EFX_DUMMY_FIELD_LBN 0 #define EFX_DUMMY_FIELD_WIDTH 0 +#define EFX_WORD_0_LBN 0 +#define EFX_WORD_0_WIDTH 16 +#define EFX_WORD_1_LBN 16 +#define EFX_WORD_1_WIDTH 16 #define EFX_DWORD_0_LBN 0 #define EFX_DWORD_0_WIDTH 32 #define EFX_DWORD_1_LBN 32 diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c new file mode 100644 index 00000000000..5f42313b496 --- /dev/null +++ b/drivers/net/ethernet/sfc/ef10.c @@ -0,0 +1,3043 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2012-2013 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "net_driver.h" +#include "ef10_regs.h" +#include "io.h" +#include "mcdi.h" +#include "mcdi_pcol.h" +#include "nic.h" +#include "workarounds.h" +#include <linux/in.h> +#include <linux/jhash.h> +#include <linux/wait.h> +#include <linux/workqueue.h> + +/* Hardware control for EF10 architecture including 'Huntington'. */ + +#define EFX_EF10_DRVGEN_EV 7 +enum { + EFX_EF10_TEST = 1, + EFX_EF10_REFILL, +}; + +/* The reserved RSS context value */ +#define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff + +/* The filter table(s) are managed by firmware and we have write-only + * access. When removing filters we must identify them to the + * firmware by a 64-bit handle, but this is too wide for Linux kernel + * interfaces (32-bit for RX NFC, 16-bit for RFS). Also, we need to + * be able to tell in advance whether a requested insertion will + * replace an existing filter. Therefore we maintain a software hash + * table, which should be at least as large as the hardware hash + * table. + * + * Huntington has a single 8K filter table shared between all filter + * types and both ports. + */ +#define HUNT_FILTER_TBL_ROWS 8192 + +struct efx_ef10_filter_table { +/* The RX match field masks supported by this fw & hw, in order of priority */ + enum efx_filter_match_flags rx_match_flags[ + MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM]; + unsigned int rx_match_count; + + struct { + unsigned long spec; /* pointer to spec plus flag bits */ +/* BUSY flag indicates that an update is in progress. STACK_OLD is + * used to mark and sweep stack-owned MAC filters. + */ +#define EFX_EF10_FILTER_FLAG_BUSY 1UL +#define EFX_EF10_FILTER_FLAG_STACK_OLD 2UL +#define EFX_EF10_FILTER_FLAGS 3UL + u64 handle; /* firmware handle */ + } *entry; + wait_queue_head_t waitq; +/* Shadow of net_device address lists, guarded by mac_lock */ +#define EFX_EF10_FILTER_STACK_UC_MAX 32 +#define EFX_EF10_FILTER_STACK_MC_MAX 256 + struct { + u8 addr[ETH_ALEN]; + u16 id; + } stack_uc_list[EFX_EF10_FILTER_STACK_UC_MAX], + stack_mc_list[EFX_EF10_FILTER_STACK_MC_MAX]; + int stack_uc_count; /* negative for PROMISC */ + int stack_mc_count; /* negative for PROMISC/ALLMULTI */ +}; + +/* An arbitrary search limit for the software hash table */ +#define EFX_EF10_FILTER_SEARCH_LIMIT 200 + +static void efx_ef10_rx_push_indir_table(struct efx_nic *efx); +static void efx_ef10_rx_free_indir_table(struct efx_nic *efx); +static void efx_ef10_filter_table_remove(struct efx_nic *efx); + +static int efx_ef10_get_warm_boot_count(struct efx_nic *efx) +{ + efx_dword_t reg; + + efx_readd(efx, ®, ER_DZ_BIU_MC_SFT_STATUS); + return EFX_DWORD_FIELD(reg, EFX_WORD_1) == 0xb007 ? + EFX_DWORD_FIELD(reg, EFX_WORD_0) : -EIO; +} + +static unsigned int efx_ef10_mem_map_size(struct efx_nic *efx) +{ + return resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]); +} + +static int efx_ef10_init_capabilities(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_CAPABILITIES_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_CAPABILITIES, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + + if (outlen >= sizeof(outbuf)) { + nic_data->datapath_caps = + MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1); + if (!(nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))) { + netif_err(efx, drv, efx->net_dev, + "Capabilities don't indicate TSO support.\n"); + return -ENODEV; + } + } + + return 0; +} + +static int efx_ef10_get_sysclk_freq(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CLOCK_OUT_LEN); + int rc; + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_CLOCK, NULL, 0, + outbuf, sizeof(outbuf), NULL); + if (rc) + return rc; + rc = MCDI_DWORD(outbuf, GET_CLOCK_OUT_SYS_FREQ); + return rc > 0 ? rc : -ERANGE; +} + +static int efx_ef10_get_mac_address(struct efx_nic *efx, u8 *mac_address) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN); + size_t outlen; + int rc; + + BUILD_BUG_ON(MC_CMD_GET_MAC_ADDRESSES_IN_LEN != 0); + + rc = efx_mcdi_rpc(efx, MC_CMD_GET_MAC_ADDRESSES, NULL, 0, + outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < MC_CMD_GET_MAC_ADDRESSES_OUT_LEN) + return -EIO; + + memcpy(mac_address, + MCDI_PTR(outbuf, GET_MAC_ADDRESSES_OUT_MAC_ADDR_BASE), ETH_ALEN); + return 0; +} + +static int efx_ef10_probe(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data; + int i, rc; + + /* We can have one VI for each 8K region. However we need + * multiple TX queues per channel. + */ + efx->max_channels = + min_t(unsigned int, + EFX_MAX_CHANNELS, + resource_size(&efx->pci_dev->resource[EFX_MEM_BAR]) / + (EFX_VI_PAGE_SIZE * EFX_TXQ_TYPES)); + BUG_ON(efx->max_channels == 0); + + nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL); + if (!nic_data) + return -ENOMEM; + efx->nic_data = nic_data; + + rc = efx_nic_alloc_buffer(efx, &nic_data->mcdi_buf, + 8 + MCDI_CTL_SDU_LEN_MAX_V2, GFP_KERNEL); + if (rc) + goto fail1; + + /* Get the MC's warm boot count. In case it's rebooting right + * now, be prepared to retry. + */ + i = 0; + for (;;) { + rc = efx_ef10_get_warm_boot_count(efx); + if (rc >= 0) + break; + if (++i == 5) + goto fail2; + ssleep(1); + } + nic_data->warm_boot_count = rc; + + nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; + + /* In case we're recovering from a crash (kexec), we want to + * cancel any outstanding request by the previous user of this + * function. We send a special message using the least + * significant bits of the 'high' (doorbell) register. + */ + _efx_writed(efx, cpu_to_le32(1), ER_DZ_MC_DB_HWRD); + + rc = efx_mcdi_init(efx); + if (rc) + goto fail2; + + /* Reset (most) configuration for this function */ + rc = efx_mcdi_reset(efx, RESET_TYPE_ALL); + if (rc) + goto fail3; + + /* Enable event logging */ + rc = efx_mcdi_log_ctrl(efx, true, false, 0); + if (rc) + goto fail3; + + rc = efx_ef10_init_capabilities(efx); + if (rc < 0) + goto fail3; + + efx->rx_packet_len_offset = + ES_DZ_RX_PREFIX_PKTLEN_OFST - ES_DZ_RX_PREFIX_SIZE; + + if (!(nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_RX_PREFIX_LEN_14_LBN))) { + netif_err(efx, probe, efx->net_dev, + "current firmware does not support an RX prefix\n"); + rc = -ENODEV; + goto fail3; + } + + rc = efx_mcdi_port_get_number(efx); + if (rc < 0) + goto fail3; + efx->port_num = rc; + + rc = efx_ef10_get_mac_address(efx, efx->net_dev->perm_addr); + if (rc) + goto fail3; + + rc = efx_ef10_get_sysclk_freq(efx); + if (rc < 0) + goto fail3; + efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */ + + /* Check whether firmware supports bug 35388 workaround */ + rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true); + if (rc == 0) + nic_data->workaround_35388 = true; + else if (rc != -ENOSYS && rc != -ENOENT) + goto fail3; + netif_dbg(efx, probe, efx->net_dev, + "workaround for bug 35388 is %sabled\n", + nic_data->workaround_35388 ? "en" : "dis"); + + rc = efx_mcdi_mon_probe(efx); + if (rc) + goto fail3; + + efx_ptp_probe(efx); + + return 0; + +fail3: + efx_mcdi_fini(efx); +fail2: + efx_nic_free_buffer(efx, &nic_data->mcdi_buf); +fail1: + kfree(nic_data); + efx->nic_data = NULL; + return rc; +} + +static int efx_ef10_free_vis(struct efx_nic *efx) +{ + int rc = efx_mcdi_rpc(efx, MC_CMD_FREE_VIS, NULL, 0, NULL, 0, NULL); + + /* -EALREADY means nothing to free, so ignore */ + if (rc == -EALREADY) + rc = 0; + return rc; +} + +static void efx_ef10_remove(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + efx_mcdi_mon_remove(efx); + + /* This needs to be after efx_ptp_remove_channel() with no filters */ + efx_ef10_rx_free_indir_table(efx); + + rc = efx_ef10_free_vis(efx); + WARN_ON(rc != 0); + + efx_mcdi_fini(efx); + efx_nic_free_buffer(efx, &nic_data->mcdi_buf); + kfree(nic_data); +} + +static int efx_ef10_alloc_vis(struct efx_nic *efx, + unsigned int min_vis, unsigned int max_vis) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis); + MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis); + rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + if (rc != 0) + return rc; + + if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN) + return -EIO; + + netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n", + MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE)); + + nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE); + nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT); + return 0; +} + +static int efx_ef10_dimension_resources(struct efx_nic *efx) +{ + unsigned int n_vis = + max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); + + return efx_ef10_alloc_vis(efx, n_vis, n_vis); +} + +static int efx_ef10_init_nic(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + if (nic_data->must_realloc_vis) { + /* We cannot let the number of VIs change now */ + rc = efx_ef10_alloc_vis(efx, nic_data->n_allocated_vis, + nic_data->n_allocated_vis); + if (rc) + return rc; + nic_data->must_realloc_vis = false; + } + + efx_ef10_rx_push_indir_table(efx); + return 0; +} + +static int efx_ef10_map_reset_flags(u32 *flags) +{ + enum { + EF10_RESET_PORT = ((ETH_RESET_MAC | ETH_RESET_PHY) << + ETH_RESET_SHARED_SHIFT), + EF10_RESET_MC = ((ETH_RESET_DMA | ETH_RESET_FILTER | + ETH_RESET_OFFLOAD | ETH_RESET_MAC | + ETH_RESET_PHY | ETH_RESET_MGMT) << + ETH_RESET_SHARED_SHIFT) + }; + + /* We assume for now that our PCI function is permitted to + * reset everything. + */ + + if ((*flags & EF10_RESET_MC) == EF10_RESET_MC) { + *flags &= ~EF10_RESET_MC; + return RESET_TYPE_WORLD; + } + + if ((*flags & EF10_RESET_PORT) == EF10_RESET_PORT) { + *flags &= ~EF10_RESET_PORT; + return RESET_TYPE_ALL; + } + + /* no invisible reset implemented */ + + return -EINVAL; +} + +#define EF10_DMA_STAT(ext_name, mcdi_name) \ + [EF10_STAT_ ## ext_name] = \ + { #ext_name, 64, 8 * MC_CMD_MAC_ ## mcdi_name } +#define EF10_DMA_INVIS_STAT(int_name, mcdi_name) \ + [EF10_STAT_ ## int_name] = \ + { NULL, 64, 8 * MC_CMD_MAC_ ## mcdi_name } +#define EF10_OTHER_STAT(ext_name) \ + [EF10_STAT_ ## ext_name] = { #ext_name, 0, 0 } + +static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { + EF10_DMA_STAT(tx_bytes, TX_BYTES), + EF10_DMA_STAT(tx_packets, TX_PKTS), + EF10_DMA_STAT(tx_pause, TX_PAUSE_PKTS), + EF10_DMA_STAT(tx_control, TX_CONTROL_PKTS), + EF10_DMA_STAT(tx_unicast, TX_UNICAST_PKTS), + EF10_DMA_STAT(tx_multicast, TX_MULTICAST_PKTS), + EF10_DMA_STAT(tx_broadcast, TX_BROADCAST_PKTS), + EF10_DMA_STAT(tx_lt64, TX_LT64_PKTS), + EF10_DMA_STAT(tx_64, TX_64_PKTS), + EF10_DMA_STAT(tx_65_to_127, TX_65_TO_127_PKTS), + EF10_DMA_STAT(tx_128_to_255, TX_128_TO_255_PKTS), + EF10_DMA_STAT(tx_256_to_511, TX_256_TO_511_PKTS), + EF10_DMA_STAT(tx_512_to_1023, TX_512_TO_1023_PKTS), + EF10_DMA_STAT(tx_1024_to_15xx, TX_1024_TO_15XX_PKTS), + EF10_DMA_STAT(tx_15xx_to_jumbo, TX_15XX_TO_JUMBO_PKTS), + EF10_DMA_STAT(rx_bytes, RX_BYTES), + EF10_DMA_INVIS_STAT(rx_bytes_minus_good_bytes, RX_BAD_BYTES), + EF10_OTHER_STAT(rx_good_bytes), + EF10_OTHER_STAT(rx_bad_bytes), + EF10_DMA_STAT(rx_packets, RX_PKTS), + EF10_DMA_STAT(rx_good, RX_GOOD_PKTS), + EF10_DMA_STAT(rx_bad, RX_BAD_FCS_PKTS), + EF10_DMA_STAT(rx_pause, RX_PAUSE_PKTS), + EF10_DMA_STAT(rx_control, RX_CONTROL_PKTS), + EF10_DMA_STAT(rx_unicast, RX_UNICAST_PKTS), + EF10_DMA_STAT(rx_multicast, RX_MULTICAST_PKTS), + EF10_DMA_STAT(rx_broadcast, RX_BROADCAST_PKTS), + EF10_DMA_STAT(rx_lt64, RX_UNDERSIZE_PKTS), + EF10_DMA_STAT(rx_64, RX_64_PKTS), + EF10_DMA_STAT(rx_65_to_127, RX_65_TO_127_PKTS), + EF10_DMA_STAT(rx_128_to_255, RX_128_TO_255_PKTS), + EF10_DMA_STAT(rx_256_to_511, RX_256_TO_511_PKTS), + EF10_DMA_STAT(rx_512_to_1023, RX_512_TO_1023_PKTS), + EF10_DMA_STAT(rx_1024_to_15xx, RX_1024_TO_15XX_PKTS), + EF10_DMA_STAT(rx_15xx_to_jumbo, RX_15XX_TO_JUMBO_PKTS), + EF10_DMA_STAT(rx_gtjumbo, RX_GTJUMBO_PKTS), + EF10_DMA_STAT(rx_bad_gtjumbo, RX_JABBER_PKTS), + EF10_DMA_STAT(rx_overflow, RX_OVERFLOW_PKTS), + EF10_DMA_STAT(rx_align_error, RX_ALIGN_ERROR_PKTS), + EF10_DMA_STAT(rx_length_error, RX_LENGTH_ERROR_PKTS), + EF10_DMA_STAT(rx_nodesc_drops, RX_NODESC_DROPS), +}; + +#define HUNT_COMMON_STAT_MASK ((1ULL << EF10_STAT_tx_bytes) | \ + (1ULL << EF10_STAT_tx_packets) | \ + (1ULL << EF10_STAT_tx_pause) | \ + (1ULL << EF10_STAT_tx_unicast) | \ + (1ULL << EF10_STAT_tx_multicast) | \ + (1ULL << EF10_STAT_tx_broadcast) | \ + (1ULL << EF10_STAT_rx_bytes) | \ + (1ULL << EF10_STAT_rx_bytes_minus_good_bytes) | \ + (1ULL << EF10_STAT_rx_good_bytes) | \ + (1ULL << EF10_STAT_rx_bad_bytes) | \ + (1ULL << EF10_STAT_rx_packets) | \ + (1ULL << EF10_STAT_rx_good) | \ + (1ULL << EF10_STAT_rx_bad) | \ + (1ULL << EF10_STAT_rx_pause) | \ + (1ULL << EF10_STAT_rx_control) | \ + (1ULL << EF10_STAT_rx_unicast) | \ + (1ULL << EF10_STAT_rx_multicast) | \ + (1ULL << EF10_STAT_rx_broadcast) | \ + (1ULL << EF10_STAT_rx_lt64) | \ + (1ULL << EF10_STAT_rx_64) | \ + (1ULL << EF10_STAT_rx_65_to_127) | \ + (1ULL << EF10_STAT_rx_128_to_255) | \ + (1ULL << EF10_STAT_rx_256_to_511) | \ + (1ULL << EF10_STAT_rx_512_to_1023) | \ + (1ULL << EF10_STAT_rx_1024_to_15xx) | \ + (1ULL << EF10_STAT_rx_15xx_to_jumbo) | \ + (1ULL << EF10_STAT_rx_gtjumbo) | \ + (1ULL << EF10_STAT_rx_bad_gtjumbo) | \ + (1ULL << EF10_STAT_rx_overflow) | \ + (1ULL << EF10_STAT_rx_nodesc_drops)) + +/* These statistics are only provided by the 10G MAC. For a 10G/40G + * switchable port we do not expose these because they might not + * include all the packets they should. + */ +#define HUNT_10G_ONLY_STAT_MASK ((1ULL << EF10_STAT_tx_control) | \ + (1ULL << EF10_STAT_tx_lt64) | \ + (1ULL << EF10_STAT_tx_64) | \ + (1ULL << EF10_STAT_tx_65_to_127) | \ + (1ULL << EF10_STAT_tx_128_to_255) | \ + (1ULL << EF10_STAT_tx_256_to_511) | \ + (1ULL << EF10_STAT_tx_512_to_1023) | \ + (1ULL << EF10_STAT_tx_1024_to_15xx) | \ + (1ULL << EF10_STAT_tx_15xx_to_jumbo)) + +/* These statistics are only provided by the 40G MAC. For a 10G/40G + * switchable port we do expose these because the errors will otherwise + * be silent. + */ +#define HUNT_40G_EXTRA_STAT_MASK ((1ULL << EF10_STAT_rx_align_error) | \ + (1ULL << EF10_STAT_rx_length_error)) + +#if BITS_PER_LONG == 64 +#define STAT_MASK_BITMAP(bits) (bits) +#else +#define STAT_MASK_BITMAP(bits) (bits) & 0xffffffff, (bits) >> 32 +#endif + +static const unsigned long *efx_ef10_stat_mask(struct efx_nic *efx) +{ + static const unsigned long hunt_40g_stat_mask[] = { + STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK | + HUNT_40G_EXTRA_STAT_MASK) + }; + static const unsigned long hunt_10g_only_stat_mask[] = { + STAT_MASK_BITMAP(HUNT_COMMON_STAT_MASK | + HUNT_10G_ONLY_STAT_MASK) + }; + u32 port_caps = efx_mcdi_phy_get_caps(efx); + + if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + return hunt_40g_stat_mask; + else + return hunt_10g_only_stat_mask; +} + +static size_t efx_ef10_describe_stats(struct efx_nic *efx, u8 *names) +{ + return efx_nic_describe_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, + efx_ef10_stat_mask(efx), names); +} + +static int efx_ef10_try_update_nic_stats(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + const unsigned long *stats_mask = efx_ef10_stat_mask(efx); + __le64 generation_start, generation_end; + u64 *stats = nic_data->stats; + __le64 *dma_stats; + + dma_stats = efx->stats_buffer.addr; + nic_data = efx->nic_data; + + generation_end = dma_stats[MC_CMD_MAC_GENERATION_END]; + if (generation_end == EFX_MC_STATS_GENERATION_INVALID) + return 0; + rmb(); + efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, stats_mask, + stats, efx->stats_buffer.addr, false); + generation_start = dma_stats[MC_CMD_MAC_GENERATION_START]; + if (generation_end != generation_start) + return -EAGAIN; + + /* Update derived statistics */ + stats[EF10_STAT_rx_good_bytes] = + stats[EF10_STAT_rx_bytes] - + stats[EF10_STAT_rx_bytes_minus_good_bytes]; + efx_update_diff_stat(&stats[EF10_STAT_rx_bad_bytes], + stats[EF10_STAT_rx_bytes_minus_good_bytes]); + + return 0; +} + + +static size_t efx_ef10_update_stats(struct efx_nic *efx, u64 *full_stats, + struct rtnl_link_stats64 *core_stats) +{ + const unsigned long *mask = efx_ef10_stat_mask(efx); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u64 *stats = nic_data->stats; + size_t stats_count = 0, index; + int retry; + + /* If we're unlucky enough to read statistics during the DMA, wait + * up to 10ms for it to finish (typically takes <500us) + */ + for (retry = 0; retry < 100; ++retry) { + if (efx_ef10_try_update_nic_stats(efx) == 0) + break; + udelay(100); + } + + if (full_stats) { + for_each_set_bit(index, mask, EF10_STAT_COUNT) { + if (efx_ef10_stat_desc[index].name) { + *full_stats++ = stats[index]; + ++stats_count; + } + } + } + + if (core_stats) { + core_stats->rx_packets = stats[EF10_STAT_rx_packets]; + core_stats->tx_packets = stats[EF10_STAT_tx_packets]; + core_stats->rx_bytes = stats[EF10_STAT_rx_bytes]; + core_stats->tx_bytes = stats[EF10_STAT_tx_bytes]; + core_stats->rx_dropped = stats[EF10_STAT_rx_nodesc_drops]; + core_stats->multicast = stats[EF10_STAT_rx_multicast]; + core_stats->rx_length_errors = + stats[EF10_STAT_rx_gtjumbo] + + stats[EF10_STAT_rx_length_error]; + core_stats->rx_crc_errors = stats[EF10_STAT_rx_bad]; + core_stats->rx_frame_errors = stats[EF10_STAT_rx_align_error]; + core_stats->rx_fifo_errors = stats[EF10_STAT_rx_overflow]; + core_stats->rx_errors = (core_stats->rx_length_errors + + core_stats->rx_crc_errors + + core_stats->rx_frame_errors); + } + + return stats_count; +} + +static void efx_ef10_push_irq_moderation(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + unsigned int mode, value; + efx_dword_t timer_cmd; + + if (channel->irq_moderation) { + mode = 3; + value = channel->irq_moderation - 1; + } else { + mode = 0; + value = 0; + } + + if (EFX_EF10_WORKAROUND_35388(efx)) { + EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS, + EFE_DD_EVQ_IND_TIMER_FLAGS, + ERF_DD_EVQ_IND_TIMER_MODE, mode, + ERF_DD_EVQ_IND_TIMER_VAL, value); + efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT, + channel->channel); + } else { + EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode, + ERF_DZ_TC_TIMER_VAL, value); + efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR, + channel->channel); + } +} + +static void efx_ef10_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol) +{ + wol->supported = 0; + wol->wolopts = 0; + memset(&wol->sopass, 0, sizeof(wol->sopass)); +} + +static int efx_ef10_set_wol(struct efx_nic *efx, u32 type) +{ + if (type != 0) + return -EINVAL; + return 0; +} + +static void efx_ef10_mcdi_request(struct efx_nic *efx, + const efx_dword_t *hdr, size_t hdr_len, + const efx_dword_t *sdu, size_t sdu_len) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u8 *pdu = nic_data->mcdi_buf.addr; + + memcpy(pdu, hdr, hdr_len); + memcpy(pdu + hdr_len, sdu, sdu_len); + wmb(); + + /* The hardware provides 'low' and 'high' (doorbell) registers + * for passing the 64-bit address of an MCDI request to + * firmware. However the dwords are swapped by firmware. The + * least significant bits of the doorbell are then 0 for all + * MCDI requests due to alignment. + */ + _efx_writed(efx, cpu_to_le32((u64)nic_data->mcdi_buf.dma_addr >> 32), + ER_DZ_MC_DB_LWRD); + _efx_writed(efx, cpu_to_le32((u32)nic_data->mcdi_buf.dma_addr), + ER_DZ_MC_DB_HWRD); +} + +static bool efx_ef10_mcdi_poll_response(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + const efx_dword_t hdr = *(const efx_dword_t *)nic_data->mcdi_buf.addr; + + rmb(); + return EFX_DWORD_FIELD(hdr, MCDI_HEADER_RESPONSE); +} + +static void +efx_ef10_mcdi_read_response(struct efx_nic *efx, efx_dword_t *outbuf, + size_t offset, size_t outlen) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + const u8 *pdu = nic_data->mcdi_buf.addr; + + memcpy(outbuf, pdu + offset, outlen); +} + +static int efx_ef10_mcdi_poll_reboot(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + int rc; + + rc = efx_ef10_get_warm_boot_count(efx); + if (rc < 0) { + /* The firmware is presumably in the process of + * rebooting. However, we are supposed to report each + * reboot just once, so we must only do that once we + * can read and store the updated warm boot count. + */ + return 0; + } + + if (rc == nic_data->warm_boot_count) + return 0; + + nic_data->warm_boot_count = rc; + + /* All our allocations have been reset */ + nic_data->must_realloc_vis = true; + nic_data->must_restore_filters = true; + nic_data->rx_rss_context = EFX_EF10_RSS_CONTEXT_INVALID; + + return -EIO; +} + +/* Handle an MSI interrupt + * + * Handle an MSI hardware interrupt. This routine schedules event + * queue processing. No interrupt acknowledgement cycle is necessary. + * Also, we never need to check that the interrupt is for us, since + * MSI interrupts cannot be shared. + */ +static irqreturn_t efx_ef10_msi_interrupt(int irq, void *dev_id) +{ + struct efx_msi_context *context = dev_id; + struct efx_nic *efx = context->efx; + + netif_vdbg(efx, intr, efx->net_dev, + "IRQ %d on CPU %d\n", irq, raw_smp_processor_id()); + + if (likely(ACCESS_ONCE(efx->irq_soft_enabled))) { + /* Note test interrupts */ + if (context->index == efx->irq_level) + efx->last_irq_cpu = raw_smp_processor_id(); + + /* Schedule processing of the channel */ + efx_schedule_channel_irq(efx->channel[context->index]); + } + + return IRQ_HANDLED; +} + +static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id) +{ + struct efx_nic *efx = dev_id; + bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled); + struct efx_channel *channel; + efx_dword_t reg; + u32 queues; + + /* Read the ISR which also ACKs the interrupts */ + efx_readd(efx, ®, ER_DZ_BIU_INT_ISR); + queues = EFX_DWORD_FIELD(reg, ERF_DZ_ISR_REG); + + if (queues == 0) + return IRQ_NONE; + + if (likely(soft_enabled)) { + /* Note test interrupts */ + if (queues & (1U << efx->irq_level)) + efx->last_irq_cpu = raw_smp_processor_id(); + + efx_for_each_channel(channel, efx) { + if (queues & 1) + efx_schedule_channel_irq(channel); + queues >>= 1; + } + } + + netif_vdbg(efx, intr, efx->net_dev, + "IRQ %d on CPU %d status " EFX_DWORD_FMT "\n", + irq, raw_smp_processor_id(), EFX_DWORD_VAL(reg)); + + return IRQ_HANDLED; +} + +static void efx_ef10_irq_test_generate(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_TRIGGER_INTERRUPT_IN_LEN); + + BUILD_BUG_ON(MC_CMD_TRIGGER_INTERRUPT_OUT_LEN != 0); + + MCDI_SET_DWORD(inbuf, TRIGGER_INTERRUPT_IN_INTR_LEVEL, efx->irq_level); + (void) efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT, + inbuf, sizeof(inbuf), NULL, 0, NULL); +} + +static int efx_ef10_tx_probe(struct efx_tx_queue *tx_queue) +{ + return efx_nic_alloc_buffer(tx_queue->efx, &tx_queue->txd.buf, + (tx_queue->ptr_mask + 1) * + sizeof(efx_qword_t), + GFP_KERNEL); +} + +/* This writes to the TX_DESC_WPTR and also pushes data */ +static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue, + const efx_qword_t *txd) +{ + unsigned int write_ptr; + efx_oword_t reg; + + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + EFX_POPULATE_OWORD_1(reg, ERF_DZ_TX_DESC_WPTR, write_ptr); + reg.qword[0] = *txd; + efx_writeo_page(tx_queue->efx, ®, + ER_DZ_TX_DESC_UPD, tx_queue->queue); +} + +static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / + EFX_BUF_SIZE)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_TXQ_OUT_LEN); + bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD; + size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE; + struct efx_channel *channel = tx_queue->channel; + struct efx_nic *efx = tx_queue->efx; + size_t inlen, outlen; + dma_addr_t dma_addr; + efx_qword_t *txd; + int rc; + int i; + + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue); + MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS, + INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, + INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); + + dma_addr = tx_queue->txd.buf.dma_addr; + + netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n", + tx_queue->queue, entries, (u64)dma_addr); + + for (i = 0; i < entries; ++i) { + MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr); + dma_addr += EFX_BUF_SIZE; + } + + inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); + + rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen, + outbuf, sizeof(outbuf), &outlen); + if (rc) + goto fail; + + /* A previous user of this TX queue might have set us up the + * bomb by writing a descriptor to the TX push collector but + * not the doorbell. (Each collector belongs to a port, not a + * queue or function, so cannot easily be reset.) We must + * attempt to push a no-op descriptor in its place. + */ + tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION; + tx_queue->insert_count = 1; + txd = efx_tx_desc(tx_queue, 0); + EFX_POPULATE_QWORD_4(*txd, + ESF_DZ_TX_DESC_IS_OPT, true, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_CRC_CSUM, + ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload, + ESF_DZ_TX_OPTION_IP_CSUM, csum_offload); + tx_queue->write_count = 1; + wmb(); + efx_ef10_push_tx_desc(tx_queue, txd); + + return; + +fail: + WARN_ON(true); + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_FINI_TXQ_OUT_LEN); + struct efx_nic *efx = tx_queue->efx; + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE, + tx_queue->queue); + + rc = efx_mcdi_rpc(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf), + outbuf, sizeof(outbuf), &outlen); + + if (rc && rc != -EALREADY) + goto fail; + + return; + +fail: + netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); +} + +static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue) +{ + efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf); +} + +/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */ +static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue) +{ + unsigned int write_ptr; + efx_dword_t reg; + + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + EFX_POPULATE_DWORD_1(reg, ERF_DZ_TX_DESC_WPTR_DWORD, write_ptr); + efx_writed_page(tx_queue->efx, ®, + ER_DZ_TX_DESC_UPD_DWORD, tx_queue->queue); +} + +static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) +{ + unsigned int old_write_count = tx_queue->write_count; + struct efx_tx_buffer *buffer; + unsigned int write_ptr; + efx_qword_t *txd; + + BUG_ON(tx_queue->write_count == tx_queue->insert_count); + + do { + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + buffer = &tx_queue->buffer[write_ptr]; + txd = efx_tx_desc(tx_queue, write_ptr); + ++tx_queue->write_count; + + /* Create TX descriptor ring entry */ + if (buffer->flags & EFX_TX_BUF_OPTION) { + *txd = buffer->option; + } else { + BUILD_BUG_ON(EFX_TX_BUF_CONT != 1); + EFX_POPULATE_QWORD_3( + *txd, + ESF_DZ_TX_KER_CONT, + buffer->flags & EFX_TX_BUF_CONT, + ESF_DZ_TX_KER_BYTE_CNT, buffer->len, + ESF_DZ_TX_KER_BUF_ADDR, buffer->dma_addr); + } + } while (tx_queue->write_count != tx_queue->insert_count); + + wmb(); /* Ensure descriptors are written before they are fetched */ + + if (efx_nic_may_push_tx_desc(tx_queue, old_write_count)) { + txd = efx_tx_desc(tx_queue, + old_write_count & tx_queue->ptr_mask); + efx_ef10_push_tx_desc(tx_queue, txd); + ++tx_queue->pushes; + } else { + efx_ef1 |