diff options
Diffstat (limited to 'drivers/net/sfc/efx.c')
-rw-r--r-- | drivers/net/sfc/efx.c | 2208 |
1 files changed, 2208 insertions, 0 deletions
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c new file mode 100644 index 00000000000..59edcf793c1 --- /dev/null +++ b/drivers/net/sfc/efx.c @@ -0,0 +1,2208 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2008 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/delay.h> +#include <linux/notifier.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/in.h> +#include <linux/crc32.h> +#include <linux/ethtool.h> +#include "net_driver.h" +#include "gmii.h" +#include "ethtool.h" +#include "tx.h" +#include "rx.h" +#include "efx.h" +#include "mdio_10g.h" +#include "falcon.h" +#include "workarounds.h" +#include "mac.h" + +#define EFX_MAX_MTU (9 * 1024) + +/* RX slow fill workqueue. If memory allocation fails in the fast path, + * a work item is pushed onto this work queue to retry the allocation later, + * to avoid the NIC being starved of RX buffers. Since this is a per cpu + * workqueue, there is nothing to be gained in making it per NIC + */ +static struct workqueue_struct *refill_workqueue; + +/************************************************************************** + * + * Configurable values + * + *************************************************************************/ + +/* + * Enable large receive offload (LRO) aka soft segment reassembly (SSR) + * + * This sets the default for new devices. It can be controlled later + * using ethtool. + */ +static int lro = 1; +module_param(lro, int, 0644); +MODULE_PARM_DESC(lro, "Large receive offload acceleration"); + +/* + * Use separate channels for TX and RX events + * + * Set this to 1 to use separate channels for TX and RX. It allows us to + * apply a higher level of interrupt moderation to TX events. + * + * This is forced to 0 for MSI interrupt mode as the interrupt vector + * is not written + */ +static unsigned int separate_tx_and_rx_channels = 1; + +/* This is the weight assigned to each of the (per-channel) virtual + * NAPI devices. + */ +static int napi_weight = 64; + +/* This is the time (in jiffies) between invocations of the hardware + * monitor, which checks for known hardware bugs and resets the + * hardware and driver as necessary. + */ +unsigned int efx_monitor_interval = 1 * HZ; + +/* This controls whether or not the hardware monitor will trigger a + * reset when it detects an error condition. + */ +static unsigned int monitor_reset = 1; + +/* This controls whether or not the driver will initialise devices + * with invalid MAC addresses stored in the EEPROM or flash. If true, + * such devices will be initialised with a random locally-generated + * MAC address. This allows for loading the sfc_mtd driver to + * reprogram the flash, even if the flash contents (including the MAC + * address) have previously been erased. + */ +static unsigned int allow_bad_hwaddr; + +/* Initial interrupt moderation settings. They can be modified after + * module load with ethtool. + * + * The default for RX should strike a balance between increasing the + * round-trip latency and reducing overhead. + */ +static unsigned int rx_irq_mod_usec = 60; + +/* Initial interrupt moderation settings. They can be modified after + * module load with ethtool. + * + * This default is chosen to ensure that a 10G link does not go idle + * while a TX queue is stopped after it has become full. A queue is + * restarted when it drops below half full. The time this takes (assuming + * worst case 3 descriptors per packet and 1024 descriptors) is + * 512 / 3 * 1.2 = 205 usec. + */ +static unsigned int tx_irq_mod_usec = 150; + +/* This is the first interrupt mode to try out of: + * 0 => MSI-X + * 1 => MSI + * 2 => legacy + */ +static unsigned int interrupt_mode; + +/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), + * i.e. the number of CPUs among which we may distribute simultaneous + * interrupt handling. + * + * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. + * The default (0) means to assign an interrupt to each package (level II cache) + */ +static unsigned int rss_cpus; +module_param(rss_cpus, uint, 0444); +MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); + +/************************************************************************** + * + * Utility functions and prototypes + * + *************************************************************************/ +static void efx_remove_channel(struct efx_channel *channel); +static void efx_remove_port(struct efx_nic *efx); +static void efx_fini_napi(struct efx_nic *efx); +static void efx_fini_channels(struct efx_nic *efx); + +#define EFX_ASSERT_RESET_SERIALISED(efx) \ + do { \ + if ((efx->state == STATE_RUNNING) || \ + (efx->state == STATE_RESETTING)) \ + ASSERT_RTNL(); \ + } while (0) + +/************************************************************************** + * + * Event queue processing + * + *************************************************************************/ + +/* Process channel's event queue + * + * This function is responsible for processing the event queue of a + * single channel. The caller must guarantee that this function will + * never be concurrently called more than once on the same channel, + * though different channels may be being processed concurrently. + */ +static inline int efx_process_channel(struct efx_channel *channel, int rx_quota) +{ + int rxdmaqs; + struct efx_rx_queue *rx_queue; + + if (unlikely(channel->efx->reset_pending != RESET_TYPE_NONE || + !channel->enabled)) + return rx_quota; + + rxdmaqs = falcon_process_eventq(channel, &rx_quota); + + /* Deliver last RX packet. */ + if (channel->rx_pkt) { + __efx_rx_packet(channel, channel->rx_pkt, + channel->rx_pkt_csummed); + channel->rx_pkt = NULL; + } + + efx_flush_lro(channel); + efx_rx_strategy(channel); + + /* Refill descriptor rings as necessary */ + rx_queue = &channel->efx->rx_queue[0]; + while (rxdmaqs) { + if (rxdmaqs & 0x01) + efx_fast_push_rx_descriptors(rx_queue); + rx_queue++; + rxdmaqs >>= 1; + } + + return rx_quota; +} + +/* Mark channel as finished processing + * + * Note that since we will not receive further interrupts for this + * channel before we finish processing and call the eventq_read_ack() + * method, there is no need to use the interrupt hold-off timers. + */ +static inline void efx_channel_processed(struct efx_channel *channel) +{ + /* Write to EVQ_RPTR_REG. If a new event arrived in a race + * with finishing processing, a new interrupt will be raised. + */ + channel->work_pending = 0; + smp_wmb(); /* Ensure channel updated before any new interrupt. */ + falcon_eventq_read_ack(channel); +} + +/* NAPI poll handler + * + * NAPI guarantees serialisation of polls of the same device, which + * provides the guarantee required by efx_process_channel(). + */ +static int efx_poll(struct napi_struct *napi, int budget) +{ + struct efx_channel *channel = + container_of(napi, struct efx_channel, napi_str); + struct net_device *napi_dev = channel->napi_dev; + int unused; + int rx_packets; + + EFX_TRACE(channel->efx, "channel %d NAPI poll executing on CPU %d\n", + channel->channel, raw_smp_processor_id()); + + unused = efx_process_channel(channel, budget); + rx_packets = (budget - unused); + + if (rx_packets < budget) { + /* There is no race here; although napi_disable() will + * only wait for netif_rx_complete(), this isn't a problem + * since efx_channel_processed() will have no effect if + * interrupts have already been disabled. + */ + netif_rx_complete(napi_dev, napi); + efx_channel_processed(channel); + } + + return rx_packets; +} + +/* Process the eventq of the specified channel immediately on this CPU + * + * Disable hardware generated interrupts, wait for any existing + * processing to finish, then directly poll (and ack ) the eventq. + * Finally reenable NAPI and interrupts. + * + * Since we are touching interrupts the caller should hold the suspend lock + */ +void efx_process_channel_now(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + + BUG_ON(!channel->used_flags); + BUG_ON(!channel->enabled); + + /* Disable interrupts and wait for ISRs to complete */ + falcon_disable_interrupts(efx); + if (efx->legacy_irq) + synchronize_irq(efx->legacy_irq); + if (channel->has_interrupt && channel->irq) + synchronize_irq(channel->irq); + + /* Wait for any NAPI processing to complete */ + napi_disable(&channel->napi_str); + + /* Poll the channel */ + (void) efx_process_channel(channel, efx->type->evq_size); + + /* Ack the eventq. This may cause an interrupt to be generated + * when they are reenabled */ + efx_channel_processed(channel); + + napi_enable(&channel->napi_str); + falcon_enable_interrupts(efx); +} + +/* Create event queue + * Event queue memory allocations are done only once. If the channel + * is reset, the memory buffer will be reused; this guards against + * errors during channel reset and also simplifies interrupt handling. + */ +static int efx_probe_eventq(struct efx_channel *channel) +{ + EFX_LOG(channel->efx, "chan %d create event queue\n", channel->channel); + + return falcon_probe_eventq(channel); +} + +/* Prepare channel's event queue */ +static int efx_init_eventq(struct efx_channel *channel) +{ + EFX_LOG(channel->efx, "chan %d init event queue\n", channel->channel); + + channel->eventq_read_ptr = 0; + + return falcon_init_eventq(channel); +} + +static void efx_fini_eventq(struct efx_channel *channel) +{ + EFX_LOG(channel->efx, "chan %d fini event queue\n", channel->channel); + + falcon_fini_eventq(channel); +} + +static void efx_remove_eventq(struct efx_channel *channel) +{ + EFX_LOG(channel->efx, "chan %d remove event queue\n", channel->channel); + + falcon_remove_eventq(channel); +} + +/************************************************************************** + * + * Channel handling + * + *************************************************************************/ + +/* Setup per-NIC RX buffer parameters. + * Calculate the rx buffer allocation parameters required to support + * the current MTU, including padding for header alignment and overruns. + */ +static void efx_calc_rx_buffer_params(struct efx_nic *efx) +{ + unsigned int order, len; + + len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) + + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + + efx->type->rx_buffer_padding); + + /* Calculate page-order */ + for (order = 0; ((1u << order) * PAGE_SIZE) < len; ++order) + ; + + efx->rx_buffer_len = len; + efx->rx_buffer_order = order; +} + +static int efx_probe_channel(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int rc; + + EFX_LOG(channel->efx, "creating channel %d\n", channel->channel); + + rc = efx_probe_eventq(channel); + if (rc) + goto fail1; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + rc = efx_probe_tx_queue(tx_queue); + if (rc) + goto fail2; + } + + efx_for_each_channel_rx_queue(rx_queue, channel) { + rc = efx_probe_rx_queue(rx_queue); + if (rc) + goto fail3; + } + + channel->n_rx_frm_trunc = 0; + + return 0; + + fail3: + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_remove_rx_queue(rx_queue); + fail2: + efx_for_each_channel_tx_queue(tx_queue, channel) + efx_remove_tx_queue(tx_queue); + fail1: + return rc; +} + + +/* Channels are shutdown and reinitialised whilst the NIC is running + * to propagate configuration changes (mtu, checksum offload), or + * to clear hardware error conditions + */ +static int efx_init_channels(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + struct efx_channel *channel; + int rc = 0; + + efx_calc_rx_buffer_params(efx); + + /* Initialise the channels */ + efx_for_each_channel(channel, efx) { + EFX_LOG(channel->efx, "init chan %d\n", channel->channel); + + rc = efx_init_eventq(channel); + if (rc) + goto err; + + efx_for_each_channel_tx_queue(tx_queue, channel) { + rc = efx_init_tx_queue(tx_queue); + if (rc) + goto err; + } + + /* The rx buffer allocation strategy is MTU dependent */ + efx_rx_strategy(channel); + + efx_for_each_channel_rx_queue(rx_queue, channel) { + rc = efx_init_rx_queue(rx_queue); + if (rc) + goto err; + } + + WARN_ON(channel->rx_pkt != NULL); + efx_rx_strategy(channel); + } + + return 0; + + err: + EFX_ERR(efx, "failed to initialise channel %d\n", + channel ? channel->channel : -1); + efx_fini_channels(efx); + return rc; +} + +/* This enables event queue processing and packet transmission. + * + * Note that this function is not allowed to fail, since that would + * introduce too much complexity into the suspend/resume path. + */ +static void efx_start_channel(struct efx_channel *channel) +{ + struct efx_rx_queue *rx_queue; + + EFX_LOG(channel->efx, "starting chan %d\n", channel->channel); + + if (!(channel->efx->net_dev->flags & IFF_UP)) + netif_napi_add(channel->napi_dev, &channel->napi_str, + efx_poll, napi_weight); + + channel->work_pending = 0; + channel->enabled = 1; + smp_wmb(); /* ensure channel updated before first interrupt */ + + napi_enable(&channel->napi_str); + + /* Load up RX descriptors */ + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_fast_push_rx_descriptors(rx_queue); +} + +/* This disables event queue processing and packet transmission. + * This function does not guarantee that all queue processing + * (e.g. RX refill) is complete. + */ +static void efx_stop_channel(struct efx_channel *channel) +{ + struct efx_rx_queue *rx_queue; + + if (!channel->enabled) + return; + + EFX_LOG(channel->efx, "stop chan %d\n", channel->channel); + + channel->enabled = 0; + napi_disable(&channel->napi_str); + + /* Ensure that any worker threads have exited or will be no-ops */ + efx_for_each_channel_rx_queue(rx_queue, channel) { + spin_lock_bh(&rx_queue->add_lock); + spin_unlock_bh(&rx_queue->add_lock); + } +} + +static void efx_fini_channels(struct efx_nic *efx) +{ + struct efx_channel *channel; + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + + EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->port_enabled); + + efx_for_each_channel(channel, efx) { + EFX_LOG(channel->efx, "shut down chan %d\n", channel->channel); + + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_fini_rx_queue(rx_queue); + efx_for_each_channel_tx_queue(tx_queue, channel) + efx_fini_tx_queue(tx_queue); + } + + /* Do the event queues last so that we can handle flush events + * for all DMA queues. */ + efx_for_each_channel(channel, efx) { + EFX_LOG(channel->efx, "shut down evq %d\n", channel->channel); + + efx_fini_eventq(channel); + } +} + +static void efx_remove_channel(struct efx_channel *channel) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + + EFX_LOG(channel->efx, "destroy chan %d\n", channel->channel); + + efx_for_each_channel_rx_queue(rx_queue, channel) + efx_remove_rx_queue(rx_queue); + efx_for_each_channel_tx_queue(tx_queue, channel) + efx_remove_tx_queue(tx_queue); + efx_remove_eventq(channel); + + channel->used_flags = 0; +} + +void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue, int delay) +{ + queue_delayed_work(refill_workqueue, &rx_queue->work, delay); +} + +/************************************************************************** + * + * Port handling + * + **************************************************************************/ + +/* This ensures that the kernel is kept informed (via + * netif_carrier_on/off) of the link status, and also maintains the + * link status's stop on the port's TX queue. + */ +static void efx_link_status_changed(struct efx_nic *efx) +{ + int carrier_ok; + + /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure + * that no events are triggered between unregister_netdev() and the + * driver unloading. A more general condition is that NETDEV_CHANGE + * can only be generated between NETDEV_UP and NETDEV_DOWN */ + if (!netif_running(efx->net_dev)) + return; + + carrier_ok = netif_carrier_ok(efx->net_dev) ? 1 : 0; + if (efx->link_up != carrier_ok) { + efx->n_link_state_changes++; + + if (efx->link_up) + netif_carrier_on(efx->net_dev); + else + netif_carrier_off(efx->net_dev); + } + + /* Status message for kernel log */ + if (efx->link_up) { + struct mii_if_info *gmii = &efx->mii; + unsigned adv, lpa; + /* NONE here means direct XAUI from the controller, with no + * MDIO-attached device we can query. */ + if (efx->phy_type != PHY_TYPE_NONE) { + adv = gmii_advertised(gmii); + lpa = gmii_lpa(gmii); + } else { + lpa = GM_LPA_10000 | LPA_DUPLEX; + adv = lpa; + } + EFX_INFO(efx, "link up at %dMbps %s-duplex " + "(adv %04x lpa %04x) (MTU %d)%s\n", + (efx->link_options & GM_LPA_10000 ? 10000 : + (efx->link_options & GM_LPA_1000 ? 1000 : + (efx->link_options & GM_LPA_100 ? 100 : + 10))), + (efx->link_options & GM_LPA_DUPLEX ? + "full" : "half"), + adv, lpa, + efx->net_dev->mtu, + (efx->promiscuous ? " [PROMISC]" : "")); + } else { + EFX_INFO(efx, "link down\n"); + } + +} + +/* This call reinitialises the MAC to pick up new PHY settings. The + * caller must hold the mac_lock */ +static void __efx_reconfigure_port(struct efx_nic *efx) +{ + WARN_ON(!mutex_is_locked(&efx->mac_lock)); + + EFX_LOG(efx, "reconfiguring MAC from PHY settings on CPU %d\n", + raw_smp_processor_id()); + + falcon_reconfigure_xmac(efx); + + /* Inform kernel of loss/gain of carrier */ + efx_link_status_changed(efx); +} + +/* Reinitialise the MAC to pick up new PHY settings, even if the port is + * disabled. */ +void efx_reconfigure_port(struct efx_nic *efx) +{ + EFX_ASSERT_RESET_SERIALISED(efx); + + mutex_lock(&efx->mac_lock); + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); +} + +/* Asynchronous efx_reconfigure_port work item. To speed up efx_flush_all() + * we don't efx_reconfigure_port() if the port is disabled. Care is taken + * in efx_stop_all() and efx_start_port() to prevent PHY events being lost */ +static void efx_reconfigure_work(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, + reconfigure_work); + + mutex_lock(&efx->mac_lock); + if (efx->port_enabled) + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); +} + +static int efx_probe_port(struct efx_nic *efx) +{ + int rc; + + EFX_LOG(efx, "create port\n"); + + /* Connect up MAC/PHY operations table and read MAC address */ + rc = falcon_probe_port(efx); + if (rc) + goto err; + + /* Sanity check MAC address */ + if (is_valid_ether_addr(efx->mac_address)) { + memcpy(efx->net_dev->dev_addr, efx->mac_address, ETH_ALEN); + } else { + DECLARE_MAC_BUF(mac); + + EFX_ERR(efx, "invalid MAC address %s\n", + print_mac(mac, efx->mac_address)); + if (!allow_bad_hwaddr) { + rc = -EINVAL; + goto err; + } + random_ether_addr(efx->net_dev->dev_addr); + EFX_INFO(efx, "using locally-generated MAC %s\n", + print_mac(mac, efx->net_dev->dev_addr)); + } + + return 0; + + err: + efx_remove_port(efx); + return rc; +} + +static int efx_init_port(struct efx_nic *efx) +{ + int rc; + + EFX_LOG(efx, "init port\n"); + + /* Initialise the MAC and PHY */ + rc = falcon_init_xmac(efx); + if (rc) + return rc; + + efx->port_initialized = 1; + + /* Reconfigure port to program MAC registers */ + falcon_reconfigure_xmac(efx); + + return 0; +} + +/* Allow efx_reconfigure_port() to be scheduled, and close the window + * between efx_stop_port and efx_flush_all whereby a previously scheduled + * efx_reconfigure_port() may have been cancelled */ +static void efx_start_port(struct efx_nic *efx) +{ + EFX_LOG(efx, "start port\n"); + BUG_ON(efx->port_enabled); + + mutex_lock(&efx->mac_lock); + efx->port_enabled = 1; + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); +} + +/* Prevent efx_reconfigure_work and efx_monitor() from executing, and + * efx_set_multicast_list() from scheduling efx_reconfigure_work. + * efx_reconfigure_work can still be scheduled via NAPI processing + * until efx_flush_all() is called */ +static void efx_stop_port(struct efx_nic *efx) +{ + EFX_LOG(efx, "stop port\n"); + + mutex_lock(&efx->mac_lock); + efx->port_enabled = 0; + mutex_unlock(&efx->mac_lock); + + /* Serialise against efx_set_multicast_list() */ + if (NET_DEV_REGISTERED(efx)) { + netif_tx_lock_bh(efx->net_dev); + netif_tx_unlock_bh(efx->net_dev); + } +} + +static void efx_fini_port(struct efx_nic *efx) +{ + EFX_LOG(efx, "shut down port\n"); + + if (!efx->port_initialized) + return; + + falcon_fini_xmac(efx); + efx->port_initialized = 0; + + efx->link_up = 0; + efx_link_status_changed(efx); +} + +static void efx_remove_port(struct efx_nic *efx) +{ + EFX_LOG(efx, "destroying port\n"); + + falcon_remove_port(efx); +} + +/************************************************************************** + * + * NIC handling + * + **************************************************************************/ + +/* This configures the PCI device to enable I/O and DMA. */ +static int efx_init_io(struct efx_nic *efx) +{ + struct pci_dev *pci_dev = efx->pci_dev; + dma_addr_t dma_mask = efx->type->max_dma_mask; + int rc; + + EFX_LOG(efx, "initialising I/O\n"); + + rc = pci_enable_device(pci_dev); + if (rc) { + EFX_ERR(efx, "failed to enable PCI device\n"); + goto fail1; + } + + pci_set_master(pci_dev); + + /* Set the PCI DMA mask. Try all possibilities from our + * genuine mask down to 32 bits, because some architectures + * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit + * masks event though they reject 46 bit masks. + */ + while (dma_mask > 0x7fffffffUL) { + if (pci_dma_supported(pci_dev, dma_mask) && + ((rc = pci_set_dma_mask(pci_dev, dma_mask)) == 0)) + break; + dma_mask >>= 1; + } + if (rc) { + EFX_ERR(efx, "could not find a suitable DMA mask\n"); + goto fail2; + } + EFX_LOG(efx, "using DMA mask %llx\n", (unsigned long long) dma_mask); + rc = pci_set_consistent_dma_mask(pci_dev, dma_mask); + if (rc) { + /* pci_set_consistent_dma_mask() is not *allowed* to + * fail with a mask that pci_set_dma_mask() accepted, + * but just in case... + */ + EFX_ERR(efx, "failed to set consistent DMA mask\n"); + goto fail2; + } + + efx->membase_phys = pci_resource_start(efx->pci_dev, + efx->type->mem_bar); + rc = pci_request_region(pci_dev, efx->type->mem_bar, "sfc"); + if (rc) { + EFX_ERR(efx, "request for memory BAR failed\n"); + rc = -EIO; + goto fail3; + } + efx->membase = ioremap_nocache(efx->membase_phys, + efx->type->mem_map_size); + if (!efx->membase) { + EFX_ERR(efx, "could not map memory BAR %d at %lx+%x\n", + efx->type->mem_bar, efx->membase_phys, + efx->type->mem_map_size); + rc = -ENOMEM; + goto fail4; + } + EFX_LOG(efx, "memory BAR %u at %lx+%x (virtual %p)\n", + efx->type->mem_bar, efx->membase_phys, efx->type->mem_map_size, + efx->membase); + + return 0; + + fail4: + release_mem_region(efx->membase_phys, efx->type->mem_map_size); + fail3: + efx->membase_phys = 0UL; + fail2: + pci_disable_device(efx->pci_dev); + fail1: + return rc; +} + +static void efx_fini_io(struct efx_nic *efx) +{ + EFX_LOG(efx, "shutting down I/O\n"); + + if (efx->membase) { + iounmap(efx->membase); + efx->membase = NULL; + } + + if (efx->membase_phys) { + pci_release_region(efx->pci_dev, efx->type->mem_bar); + efx->membase_phys = 0UL; + } + + pci_disable_device(efx->pci_dev); +} + +/* Probe the number and type of interrupts we are able to obtain. */ +static void efx_probe_interrupts(struct efx_nic *efx) +{ + int max_channel = efx->type->phys_addr_channels - 1; + struct msix_entry xentries[EFX_MAX_CHANNELS]; + int rc, i; + + if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { + BUG_ON(!pci_find_capability(efx->pci_dev, PCI_CAP_ID_MSIX)); + + efx->rss_queues = rss_cpus ? rss_cpus : num_online_cpus(); + efx->rss_queues = min(efx->rss_queues, max_channel + 1); + efx->rss_queues = min(efx->rss_queues, EFX_MAX_CHANNELS); + + /* Request maximum number of MSI interrupts, and fill out + * the channel interrupt information the allowed allocation */ + for (i = 0; i < efx->rss_queues; i++) + xentries[i].entry = i; + rc = pci_enable_msix(efx->pci_dev, xentries, efx->rss_queues); + if (rc > 0) { + EFX_BUG_ON_PARANOID(rc >= efx->rss_queues); + efx->rss_queues = rc; + rc = pci_enable_msix(efx->pci_dev, xentries, + efx->rss_queues); + } + + if (rc == 0) { + for (i = 0; i < efx->rss_queues; i++) { + efx->channel[i].has_interrupt = 1; + efx->channel[i].irq = xentries[i].vector; + } + } else { + /* Fall back to single channel MSI */ + efx->interrupt_mode = EFX_INT_MODE_MSI; + EFX_ERR(efx, "could not enable MSI-X\n"); + } + } + + /* Try single interrupt MSI */ + if (efx->interrupt_mode == EFX_INT_MODE_MSI) { + efx->rss_queues = 1; + rc = pci_enable_msi(efx->pci_dev); + if (rc == 0) { + efx->channel[0].irq = efx->pci_dev->irq; + efx->channel[0].has_interrupt = 1; + } else { + EFX_ERR(efx, "could not enable MSI\n"); + efx->interrupt_mode = EFX_INT_MODE_LEGACY; + } + } + + /* Assume legacy interrupts */ + if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { + efx->rss_queues = 1; + /* Every channel is interruptible */ + for (i = 0; i < EFX_MAX_CHANNELS; i++) + efx->channel[i].has_interrupt = 1; + efx->legacy_irq = efx->pci_dev->irq; + } +} + +static void efx_remove_interrupts(struct efx_nic *efx) +{ + struct efx_channel *channel; + + /* Remove MSI/MSI-X interrupts */ + efx_for_each_channel_with_interrupt(channel, efx) + channel->irq = 0; + pci_disable_msi(efx->pci_dev); + pci_disable_msix(efx->pci_dev); + + /* Remove legacy interrupt */ + efx->legacy_irq = 0; +} + +/* Select number of used resources + * Should be called after probe_interrupts() + */ +static void efx_select_used(struct efx_nic *efx) +{ + struct efx_tx_queue *tx_queue; + struct efx_rx_queue *rx_queue; + int i; + + /* TX queues. One per port per channel with TX capability + * (more than one per port won't work on Linux, due to out + * of order issues... but will be fine on Solaris) + */ + tx_queue = &efx->tx_queue[0]; + + /* Perform this for each channel with TX capabilities. + * At the moment, we only support a single TX queue + */ + tx_queue->used = 1; + if ((!EFX_INT_MODE_USE_MSI(efx)) && separate_tx_and_rx_channels) + tx_queue->channel = &efx->channel[1]; + else + tx_queue->channel = &efx->channel[0]; + tx_queue->channel->used_flags |= EFX_USED_BY_TX; + tx_queue++; + + /* RX queues. Each has a dedicated channel. */ + for (i = 0; i < EFX_MAX_RX_QUEUES; i++) { + rx_queue = &efx->rx_queue[i]; + + if (i < efx->rss_queues) { + rx_queue->used = 1; + /* If we allow multiple RX queues per channel + * we need to decide that here + */ + rx_queue->channel = &efx->channel[rx_queue->queue]; + rx_queue->channel->used_flags |= EFX_USED_BY_RX; + rx_queue++; + } + } +} + +static int efx_probe_nic(struct efx_nic *efx) +{ + int rc; + + EFX_LOG(efx, "creating NIC\n"); + + /* Carry out hardware-type specific initialisation */ + rc = falcon_probe_nic(efx); + if (rc) + return rc; + + /* Determine the number of channels and RX queues by trying to hook + * in MSI-X interrupts. */ + efx_probe_interrupts(efx); + + /* Determine number of RX queues and TX queues */ + efx_select_used(efx); + + /* Initialise the interrupt moderation settings */ + efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec); + + return 0; +} + +static void efx_remove_nic(struct efx_nic *efx) +{ + EFX_LOG(efx, "destroying NIC\n"); + + efx_remove_interrupts(efx); + falcon_remove_nic(efx); +} + +/************************************************************************** + * + * NIC startup/shutdown + * + *************************************************************************/ + +static int efx_probe_all(struct efx_nic *efx) +{ + struct efx_channel *channel; + int rc; + + /* Create NIC */ + rc = efx_probe_nic(efx); + if (rc) { + EFX_ERR(efx, "failed to create NIC\n"); + goto fail1; + } + + /* Create port */ + rc = efx_probe_port(efx); + if (rc) { + EFX_ERR(efx, "failed to create port\n"); + goto fail2; + } + + /* Create channels */ + efx_for_each_channel(channel, efx) { + rc = efx_probe_channel(channel); + if (rc) { + EFX_ERR(efx, "failed to create channel %d\n", + channel->channel); + goto fail3; + } + } + + return 0; + + fail3: + efx_for_each_channel(channel, efx) + efx_remove_channel(channel); + efx_remove_port(efx); + fail2: + efx_remove_nic(efx); + fail1: + return rc; +} + +/* Called after previous invocation(s) of efx_stop_all, restarts the + * port, kernel transmit queue, NAPI processing and hardware interrupts, + * and ensures that the port is scheduled to be reconfigured. + * This function is safe to call multiple times when the NIC is in any + * state. */ +static void efx_start_all(struct efx_nic *efx) +{ + struct efx_channel *channel; + + EFX_ASSERT_RESET_SERIALISED(efx); + + /* Check that it is appropriate to restart the interface. All + * of these flags are safe to read under just the rtnl lock */ + if (efx->port_enabled) + return; + if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) + return; + if (NET_DEV_REGISTERED(efx) && !netif_running(efx->net_dev)) + return; + + /* Mark the port as enabled so port reconfigurations can start, then + * restart the transmit interface early so the watchdog timer stops */ + efx_start_port(efx); + efx_wake_queue(efx); + + efx_for_each_channel(channel, efx) + efx_start_channel(channel); + + falcon_enable_interrupts(efx); + + /* Start hardware monitor if we're in RUNNING */ + if (efx->state == STATE_RUNNING) + queue_delayed_work(efx->workqueue, &efx->monitor_work, + efx_monitor_interval); +} + +/* Flush all delayed work. Should only be called when no more delayed work + * will be scheduled. This doesn't flush pending online resets (efx_reset), + * since we're holding the rtnl_lock at this point. */ +static void efx_flush_all(struct efx_nic *efx) +{ + struct efx_rx_queue *rx_queue; + + /* Make sure the hardware monitor is stopped */ + cancel_delayed_work_sync(&efx->monitor_work); + + /* Ensure that all RX slow refills are complete. */ + efx_for_each_rx_queue(rx_queue, efx) { + cancel_delayed_work_sync(&rx_queue->work); + } + + /* Stop scheduled port reconfigurations */ + cancel_work_sync(&efx->reconfigure_work); + +} + +/* Quiesce hardware and software without bringing the link down. + * Safe to call multiple times, when the nic and interface is in any + * state. The caller is guaranteed to subsequently be in a position + * to modify any hardware and software state they see fit without + * taking locks. */ +static void efx_stop_all(struct efx_nic *efx) +{ + struct efx_channel *channel; + + EFX_ASSERT_RESET_SERIALISED(efx); + + /* port_enabled can be read safely under the rtnl lock */ + if (!efx->port_enabled) + return; + + /* Disable interrupts and wait for ISR to complete */ + falcon_disable_interrupts(efx); + if (efx->legacy_irq) + synchronize_irq(efx->legacy_irq); + efx_for_each_channel_with_interrupt(channel, efx) + if (channel->irq) + synchronize_irq(channel->irq); + + /* Stop all NAPI processing and synchronous rx refills */ + efx_for_each_channel(channel, efx) + efx_stop_channel(channel); + + /* Stop all asynchronous port reconfigurations. Since all + * event processing has already been stopped, there is no + * window to loose phy events */ + efx_stop_port(efx); + + /* Flush reconfigure_work, refill_workqueue, monitor_work */ + efx_flush_all(efx); + + /* Isolate the MAC from the TX and RX engines, so that queue + * flushes will complete in a timely fashion. */ + falcon_deconfigure_mac_wrapper(efx); + falcon_drain_tx_fifo(efx); + + /* Stop the kernel transmit interface late, so the watchdog + * timer isn't ticking over the flush */ + efx_stop_queue(efx); + if (NET_DEV_REGISTERED(efx)) { + netif_tx_lock_bh(efx->net_dev); + netif_tx_unlock_bh(efx->net_dev); + } +} + +static void efx_remove_all(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_remove_channel(channel); + efx_remove_port(efx); + efx_remove_nic(efx); +} + +/* A convinience function to safely flush all the queues */ +int efx_flush_queues(struct efx_nic *efx) +{ + int rc; + + EFX_ASSERT_RESET_SERIALISED(efx); + + efx_stop_all(efx); + + efx_fini_channels(efx); + rc = efx_init_channels(efx); + if (rc) { + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + return rc; + } + + efx_start_all(efx); + + return 0; +} + +/************************************************************************** + * + * Interrupt moderation + * + **************************************************************************/ + +/* Set interrupt moderation parameters */ +void efx_init_irq_moderation(struct efx_nic *efx, int tx_usecs, int rx_usecs) +{ |