diff options
author | David S. Miller <davem@davemloft.net> | 2012-08-24 16:35:43 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-08-24 16:35:43 -0400 |
commit | 255e87657a84e21986e5d9070f3dee4aa8d1d531 (patch) | |
tree | c20e26ce24779333d927fa4b172deb2d9df59ebf /drivers | |
parent | 85c21049fc588e0a51b443fe2bad348d18f4992c (diff) | |
parent | 8f8b3d518999fd1c342310910aa1e49112c86d05 (diff) |
Merge branch 'for-davem' of git://git.kernel.org/pub/scm/linux/kernel/git/bwh/sfc-next
Ben Hutchings says:
====================
1. Change the TX path to stop queues earlier and avoid returning
NETDEV_TX_BUSY.
2. Remove some inefficiencies in soft-TSO.
3. Fix various bugs involving device state transitions and/or reset
scheduling by error handlers.
4. Take advantage of my previous change to operstate initialisation.
5. Miscellaneous cleanup.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 235 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ethtool.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/falcon_boards.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 49 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.c | 6 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/tx.c | 621 |
6 files changed, 410 insertions, 507 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 65a8d49106a..a606db43c5b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -202,11 +202,21 @@ static void efx_stop_all(struct efx_nic *efx); #define EFX_ASSERT_RESET_SERIALISED(efx) \ do { \ - if ((efx->state == STATE_RUNNING) || \ + if ((efx->state == STATE_READY) || \ (efx->state == STATE_DISABLED)) \ ASSERT_RTNL(); \ } while (0) +static int efx_check_disabled(struct efx_nic *efx) +{ + if (efx->state == STATE_DISABLED) { + netif_err(efx, drv, efx->net_dev, + "device is disabled due to earlier errors\n"); + return -EIO; + } + return 0; +} + /************************************************************************** * * Event queue processing @@ -630,6 +640,16 @@ static void efx_start_datapath(struct efx_nic *efx) efx->rx_buffer_order = get_order(efx->rx_buffer_len + sizeof(struct efx_rx_page_state)); + /* We must keep at least one descriptor in a TX ring empty. + * We could avoid this when the queue size does not exactly + * match the hardware ring size, but it's not that important. + * Therefore we stop the queue when one more skb might fill + * the ring completely. We wake it when half way back to + * empty. + */ + efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx); + efx->txq_wake_thresh = efx->txq_stop_thresh / 2; + /* Initialise the channels */ efx_for_each_channel(channel, efx) { efx_for_each_channel_tx_queue(tx_queue, channel) @@ -730,7 +750,11 @@ efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries) struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel; u32 old_rxq_entries, old_txq_entries; unsigned i, next_buffer_table = 0; - int rc = 0; + int rc; + + rc = efx_check_disabled(efx); + if (rc) + return rc; /* Not all channels should be reallocated. We must avoid * reallocating their buffer table entries. @@ -1365,6 +1389,8 @@ static void efx_start_interrupts(struct efx_nic *efx, bool may_keep_eventq) { struct efx_channel *channel; + BUG_ON(efx->state == STATE_DISABLED); + if (efx->legacy_irq) efx->legacy_irq_enabled = true; efx_nic_enable_interrupts(efx); @@ -1382,6 +1408,9 @@ static void efx_stop_interrupts(struct efx_nic *efx, bool may_keep_eventq) { struct efx_channel *channel; + if (efx->state == STATE_DISABLED) + return; + efx_mcdi_mode_poll(efx); efx_nic_disable_interrupts(efx); @@ -1533,22 +1562,21 @@ static int efx_probe_all(struct efx_nic *efx) return rc; } -/* Called after previous invocation(s) of efx_stop_all, restarts the port, - * kernel transmit queues and NAPI processing, and ensures that the port is - * scheduled to be reconfigured. This function is safe to call multiple - * times when the NIC is in any state. +/* If the interface is supposed to be running but is not, start + * the hardware and software data path, regular activity for the port + * (MAC statistics, link polling, etc.) and schedule the port to be + * reconfigured. Interrupts must already be enabled. This function + * is safe to call multiple times, so long as the NIC is not disabled. + * Requires the RTNL lock. */ static void efx_start_all(struct efx_nic *efx) { EFX_ASSERT_RESET_SERIALISED(efx); + BUG_ON(efx->state == STATE_DISABLED); /* Check that it is appropriate to restart the interface. All * of these flags are safe to read under just the rtnl lock */ - if (efx->port_enabled) - return; - if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) - return; - if (!netif_running(efx->net_dev)) + if (efx->port_enabled || !netif_running(efx->net_dev)) return; efx_start_port(efx); @@ -1582,11 +1610,11 @@ static void efx_flush_all(struct efx_nic *efx) cancel_work_sync(&efx->mac_work); } -/* Quiesce hardware and software without bringing the link down. - * Safe to call multiple times, when the nic and interface is in any - * state. The caller is guaranteed to subsequently be in a position - * to modify any hardware and software state they see fit without - * taking locks. */ +/* Quiesce the hardware and software data path, and regular activity + * for the port without bringing the link down. Safe to call multiple + * times with the NIC in almost any state, but interrupts should be + * enabled. Requires the RTNL lock. + */ static void efx_stop_all(struct efx_nic *efx) { EFX_ASSERT_RESET_SERIALISED(efx); @@ -1739,8 +1767,6 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) struct efx_nic *efx = netdev_priv(net_dev); struct mii_ioctl_data *data = if_mii(ifr); - EFX_ASSERT_RESET_SERIALISED(efx); - /* Convert phy_id from older PRTAD/DEVAD format */ if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && (data->phy_id & 0xfc00) == 0x0400) @@ -1820,13 +1846,14 @@ static void efx_netpoll(struct net_device *net_dev) static int efx_net_open(struct net_device *net_dev) { struct efx_nic *efx = netdev_priv(net_dev); - EFX_ASSERT_RESET_SERIALISED(efx); + int rc; netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", raw_smp_processor_id()); - if (efx->state == STATE_DISABLED) - return -EIO; + rc = efx_check_disabled(efx); + if (rc) + return rc; if (efx->phy_mode & PHY_MODE_SPECIAL) return -EBUSY; if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) @@ -1852,10 +1879,8 @@ static int efx_net_stop(struct net_device *net_dev) netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", raw_smp_processor_id()); - if (efx->state != STATE_DISABLED) { - /* Stop the device and flush all the channels */ - efx_stop_all(efx); - } + /* Stop the device and flush all the channels */ + efx_stop_all(efx); return 0; } @@ -1915,9 +1940,11 @@ static void efx_watchdog(struct net_device *net_dev) static int efx_change_mtu(struct net_device *net_dev, int new_mtu) { struct efx_nic *efx = netdev_priv(net_dev); + int rc; - EFX_ASSERT_RESET_SERIALISED(efx); - + rc = efx_check_disabled(efx); + if (rc) + return rc; if (new_mtu > EFX_MAX_MTU) return -EINVAL; @@ -1926,8 +1953,6 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); mutex_lock(&efx->mac_lock); - /* Reconfigure the MAC before enabling the dma queues so that - * the RX buffers don't overflow */ net_dev->mtu = new_mtu; efx->type->reconfigure_mac(efx); mutex_unlock(&efx->mac_lock); @@ -1942,8 +1967,6 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data) struct sockaddr *addr = data; char *new_addr = addr->sa_data; - EFX_ASSERT_RESET_SERIALISED(efx); - if (!is_valid_ether_addr(new_addr)) { netif_err(efx, drv, efx->net_dev, "invalid ethernet MAC address requested: %pM\n", @@ -2079,11 +2102,27 @@ static int efx_register_netdev(struct efx_nic *efx) rtnl_lock(); + /* Enable resets to be scheduled and check whether any were + * already requested. If so, the NIC is probably hosed so we + * abort. + */ + efx->state = STATE_READY; + smp_mb(); /* ensure we change state before checking reset_pending */ + if (efx->reset_pending) { + netif_err(efx, probe, efx->net_dev, + "aborting probe due to scheduled reset\n"); + rc = -EIO; + goto fail_locked; + } + rc = dev_alloc_name(net_dev, net_dev->name); if (rc < 0) goto fail_locked; efx_update_name(efx); + /* Always start with carrier off; PHY events will detect the link */ + netif_carrier_off(net_dev); + rc = register_netdevice(net_dev); if (rc) goto fail_locked; @@ -2094,9 +2133,6 @@ static int efx_register_netdev(struct efx_nic *efx) efx_init_tx_queue_core_txq(tx_queue); } - /* Always start with carrier off; PHY events will detect the link */ - netif_carrier_off(net_dev); - rtnl_unlock(); rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); @@ -2108,14 +2144,14 @@ static int efx_register_netdev(struct efx_nic *efx) return 0; +fail_registered: + rtnl_lock(); + unregister_netdevice(net_dev); fail_locked: + efx->state = STATE_UNINIT; rtnl_unlock(); netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); return rc; - -fail_registered: - unregister_netdev(net_dev); - return rc; } static void efx_unregister_netdev(struct efx_nic *efx) @@ -2138,7 +2174,11 @@ static void efx_unregister_netdev(struct efx_nic *efx) strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); - unregister_netdev(efx->net_dev); + + rtnl_lock(); + unregister_netdevice(efx->net_dev); + efx->state = STATE_UNINIT; + rtnl_unlock(); } /************************************************************************** @@ -2154,9 +2194,9 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) EFX_ASSERT_RESET_SERIALISED(efx); efx_stop_all(efx); - mutex_lock(&efx->mac_lock); - efx_stop_interrupts(efx, false); + + mutex_lock(&efx->mac_lock); if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) efx->phy_op->fini(efx); efx->type->fini(efx); @@ -2276,16 +2316,15 @@ static void efx_reset_work(struct work_struct *data) if (!pending) return; - /* If we're not RUNNING then don't reset. Leave the reset_pending - * flags set so that efx_pci_probe_main will be retried */ - if (efx->state != STATE_RUNNING) { - netif_info(efx, drv, efx->net_dev, - "scheduled reset quenched. NIC not RUNNING\n"); - return; - } - rtnl_lock(); - (void)efx_reset(efx, fls(pending) - 1); + + /* We checked the state in efx_schedule_reset() but it may + * have changed by now. Now that we have the RTNL lock, + * it cannot change again. + */ + if (efx->state == STATE_READY) + (void)efx_reset(efx, fls(pending) - 1); + rtnl_unlock(); } @@ -2311,6 +2350,13 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) } set_bit(method, &efx->reset_pending); + smp_mb(); /* ensure we change reset_pending before checking state */ + + /* If we're not READY then just leave the flags set as the cue + * to abort probing or reschedule the reset later. + */ + if (ACCESS_ONCE(efx->state) != STATE_READY) + return; /* efx_process_channel() will no longer read events once a * reset is scheduled. So switch back to poll'd MCDI completions. */ @@ -2376,13 +2422,12 @@ static const struct efx_phy_operations efx_dummy_phy_operations = { /* This zeroes out and then fills in the invariants in a struct * efx_nic (including all sub-structures). */ -static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, +static int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, struct net_device *net_dev) { int i; /* Initialise common structures */ - memset(efx, 0, sizeof(*efx)); spin_lock_init(&efx->biu_lock); #ifdef CONFIG_SFC_MTD INIT_LIST_HEAD(&efx->mtd_list); @@ -2392,7 +2437,7 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work); efx->pci_dev = pci_dev; efx->msg_enable = debug; - efx->state = STATE_INIT; + efx->state = STATE_UNINIT; strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); efx->net_dev = net_dev; @@ -2409,8 +2454,6 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, goto fail; } - efx->type = type; - EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS); /* Higher numbered interrupt modes are less capable! */ @@ -2455,6 +2498,12 @@ static void efx_fini_struct(struct efx_nic *efx) */ static void efx_pci_remove_main(struct efx_nic *efx) { + /* Flush reset_work. It can no longer be scheduled since we + * are not READY. + */ + BUG_ON(efx->state == STATE_READY); + cancel_work_sync(&efx->reset_work); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap); efx->net_dev->rx_cpu_rmap = NULL; @@ -2480,24 +2529,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev) /* Mark the NIC as fini, then stop the interface */ rtnl_lock(); - efx->state = STATE_FINI; dev_close(efx->net_dev); - - /* Allow any queued efx_resets() to complete */ + efx_stop_interrupts(efx, false); rtnl_unlock(); - efx_stop_interrupts(efx, false); efx_sriov_fini(efx); efx_unregister_netdev(efx); efx_mtd_remove(efx); - /* Wait for any scheduled resets to complete. No more will be - * scheduled from this point because efx_stop_all() has been - * called, we are no longer registered with driverlink, and - * the net_device's have been removed. */ - cancel_work_sync(&efx->reset_work); - efx_pci_remove_main(efx); efx_fini_io(efx); @@ -2617,7 +2657,6 @@ static int efx_pci_probe_main(struct efx_nic *efx) static int __devinit efx_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *entry) { - const struct efx_nic_type *type = (const struct efx_nic_type *) entry->driver_data; struct net_device *net_dev; struct efx_nic *efx; int rc; @@ -2627,10 +2666,12 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, EFX_MAX_RX_QUEUES); if (!net_dev) return -ENOMEM; - net_dev->features |= (type->offload_features | NETIF_F_SG | + efx = netdev_priv(net_dev); + efx->type = (const struct efx_nic_type *) entry->driver_data; + net_dev->features |= (efx->type->offload_features | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_RXCSUM); - if (type->offload_features & NETIF_F_V6_CSUM) + if (efx->type->offload_features & NETIF_F_V6_CSUM) net_dev->features |= NETIF_F_TSO6; /* Mask for features that also apply to VLAN devices */ net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | @@ -2638,10 +2679,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, NETIF_F_RXCSUM); /* All offloads can be toggled */ net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA; - efx = netdev_priv(net_dev); pci_set_drvdata(pci_dev, efx); SET_NETDEV_DEV(net_dev, &pci_dev->dev); - rc = efx_init_struct(efx, type, pci_dev, net_dev); + rc = efx_init_struct(efx, pci_dev, net_dev); if (rc) goto fail1; @@ -2656,28 +2696,9 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, goto fail2; rc = efx_pci_probe_main(efx); - - /* Serialise against efx_reset(). No more resets will be - * scheduled since efx_stop_all() has been called, and we have - * not and never have been registered. - */ - cancel_work_sync(&efx->reset_work); - if (rc) goto fail3; - /* If there was a scheduled reset during probe, the NIC is - * probably hosed anyway. - */ - if (efx->reset_pending) { - rc = -EIO; - goto fail4; - } - - /* Switch to the running state before we expose the device to the OS, - * so that dev_open()|efx_start_all() will actually start the device */ - efx->state = STATE_RUNNING; - rc = efx_register_netdev(efx); if (rc) goto fail4; @@ -2717,12 +2738,18 @@ static int efx_pm_freeze(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); - efx->state = STATE_FINI; + rtnl_lock(); - netif_device_detach(efx->net_dev); + if (efx->state != STATE_DISABLED) { + efx->state = STATE_UNINIT; - efx_stop_all(efx); - efx_stop_interrupts(efx, false); + netif_device_detach(efx->net_dev); + + efx_stop_all(efx); + efx_stop_interrupts(efx, false); + } + + rtnl_unlock(); return 0; } @@ -2731,21 +2758,25 @@ static int efx_pm_thaw(struct device *dev) { struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); - efx->state = STATE_INIT; + rtnl_lock(); - efx_start_interrupts(efx, false); + if (efx->state != STATE_DISABLED) { + efx_start_interrupts(efx, false); - mutex_lock(&efx->mac_lock); - efx->phy_op->reconfigure(efx); - mutex_unlock(&efx->mac_lock); + mutex_lock(&efx->mac_lock); + efx->phy_op->reconfigure(efx); + mutex_unlock(&efx->mac_lock); - efx_start_all(efx); + efx_start_all(efx); - netif_device_attach(efx->net_dev); + netif_device_attach(efx->net_dev); - efx->state = STATE_RUNNING; + efx->state = STATE_READY; - efx->type->resume_wol(efx); + efx->type->resume_wol(efx); + } + + rtnl_unlock(); /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ queue_work(reset_workqueue, &efx->reset_work); diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 8cba2df82b1..2bd5c2d35e5 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -529,9 +529,7 @@ static void efx_ethtool_self_test(struct net_device *net_dev, if (!efx_tests) goto fail; - - ASSERT_RTNL(); - if (efx->state != STATE_RUNNING) { + if (efx->state != STATE_READY) { rc = -EIO; goto fail1; } diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c index 8687a6c3db0..ec1e99d0dca 100644 --- a/drivers/net/ethernet/sfc/falcon_boards.c +++ b/drivers/net/ethernet/sfc/falcon_boards.c @@ -380,7 +380,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev, new_mode = PHY_MODE_SPECIAL; if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) { err = 0; - } else if (efx->state != STATE_RUNNING || netif_running(efx->net_dev)) { + } else if (efx->state != STATE_READY || netif_running(efx->net_dev)) { err = -EBUSY; } else { /* Reset the PHY, reconfigure the MAC and enable/disable diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index cd9c0a98969..7ab1232494e 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -91,29 +91,31 @@ struct efx_special_buffer { }; /** - * struct efx_tx_buffer - An Efx TX buffer - * @skb: The associated socket buffer. - * Set only on the final fragment of a packet; %NULL for all other - * fragments. When this fragment completes, then we can free this - * skb. - * @tsoh: The associated TSO header structure, or %NULL if this - * buffer is not a TSO header. + * struct efx_tx_buffer - buffer state for a TX descriptor + * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be + * freed when descriptor completes + * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be + * freed when descriptor completes. * @dma_addr: DMA address of the fragment. + * @flags: Flags for allocation and DMA mapping type * @len: Length of this fragment. * This field is zero when the queue slot is empty. - * @continuation: True if this fragment is not the end of a packet. - * @unmap_single: True if dma_unmap_single should be used. * @unmap_len: Length of this fragment to unmap */ struct efx_tx_buffer { - const struct sk_buff *skb; - struct efx_tso_header *tsoh; + union { + const struct sk_buff *skb; + void *heap_buf; + }; dma_addr_t dma_addr; + unsigned short flags; unsigned short len; - bool continuation; - bool unmap_single; unsigned short unmap_len; }; +#define EFX_TX_BUF_CONT 1 /* not last descriptor of packet */ +#define EFX_TX_BUF_SKB 2 /* buffer is last part of skb */ +#define EFX_TX_BUF_HEAP 4 /* buffer was allocated with kmalloc() */ +#define EFX_TX_BUF_MAP_SINGLE 8 /* buffer was mapped with dma_map_single() */ /** * struct efx_tx_queue - An Efx TX queue @@ -133,6 +135,7 @@ struct efx_tx_buffer { * @channel: The associated channel * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring + * @tsoh_page: Array of pages of TSO header buffers * @txd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. * @initialised: Has hardware queue been initialised? @@ -156,9 +159,6 @@ struct efx_tx_buffer { * variable indicates that the queue is full. This is to * avoid cache-line ping-pong between the xmit path and the * completion path. - * @tso_headers_free: A list of TSO headers allocated for this TX queue - * that are not in use, and so available for new TSO sends. The list - * is protected by the TX queue lock. * @tso_bursts: Number of times TSO xmit invoked by kernel * @tso_long_headers: Number of packets with headers too long for standard * blocks @@ -175,6 +175,7 @@ struct efx_tx_queue { struct efx_channel *channel; struct netdev_queue *core_txq; struct efx_tx_buffer *buffer; + struct efx_buffer *tsoh_page; struct efx_special_buffer txd; unsigned int ptr_mask; bool initialised; @@ -187,7 +188,6 @@ struct efx_tx_queue { unsigned int insert_count ____cacheline_aligned_in_smp; unsigned int write_count; unsigned int old_read_count; - struct efx_tso_header *tso_headers_free; unsigned int tso_bursts; unsigned int tso_long_headers; unsigned int tso_packets; @@ -430,11 +430,9 @@ enum efx_int_mode { #define EFX_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EFX_INT_MODE_MSI) enum nic_state { - STATE_INIT = 0, - STATE_RUNNING = 1, - STATE_FINI = 2, - STATE_DISABLED = 3, - STATE_MAX, + STATE_UNINIT = 0, /* device being probed/removed or is frozen */ + STATE_READY = 1, /* hardware ready and netdev registered */ + STATE_DISABLED = 2, /* device disabled due to hardware errors */ }; /* @@ -654,7 +652,7 @@ struct vfdi_status; * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues * @irq_rx_moderation: IRQ moderation time for RX event queues * @msg_enable: Log message enable flags - * @state: Device state flag. Serialised by the rtnl_lock. + * @state: Device state number (%STATE_*). Serialised by the rtnl_lock. * @reset_pending: Bitmask for pending resets * @tx_queue: TX DMA queues * @rx_queue: RX DMA queues @@ -664,6 +662,8 @@ struct vfdi_status; * should be allocated for this NIC * @rxq_entries: Size of receive queues requested by user. * @txq_entries: Size of transmit queues requested by user. + * @txq_stop_thresh: TX queue fill level at or above which we stop it. + * @txq_wake_thresh: TX queue fill level at or below which we wake it. * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches * @sram_lim_qw: Qword address limit of SRAM @@ -774,6 +774,9 @@ struct efx_nic { unsigned rxq_entries; unsigned txq_entries; + unsigned int txq_stop_thresh; + unsigned int txq_wake_thresh; + unsigned tx_dc_base; unsigned rx_dc_base; unsigned sram_lim_qw; diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 326d799762d..cdff40b6572 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -298,7 +298,7 @@ efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer) /************************************************************************** * * Generic buffer handling - * These buffers are used for interrupt status and MAC stats + * These buffers are used for interrupt status, MAC stats, etc. * **************************************************************************/ @@ -401,8 +401,10 @@ void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) ++tx_queue->write_count; /* Create TX descriptor ring entry */ + BUILD_BUG_ON(EFX_TX_BUF_CONT != 1); EFX_POPULATE_QWORD_4(*txd, - FSF_AZ_TX_KER_CONT, buffer->continuation, + FSF_AZ_TX_KER_CONT, + buffer->flags & EFX_TX_BUF_CONT, FSF_AZ_TX_KER_BYTE_COUNT, buffer->len, FSF_AZ_TX_KER_BUF_REGION, 0, FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr); diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 18713436b44..ebca75ed78d 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -22,14 +22,6 @@ #include "nic.h" #include "workarounds.h" -/* - * TX descriptor ring full threshold - * - * The tx_queue descriptor ring fill-level must fall below this value - * before we restart the netif queue - */ -#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u) - static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer, unsigned int *pkts_compl, @@ -39,67 +31,32 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, struct device *dma_dev = &tx_queue->efx->pci_dev->dev; dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len - buffer->unmap_len); - if (buffer->unmap_single) + if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); else dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, DMA_TO_DEVICE); buffer->unmap_len = 0; - buffer->unmap_single = false; } - if (buffer->skb) { + if (buffer->flags & EFX_TX_BUF_SKB) { (*pkts_compl)++; (*bytes_compl) += buffer->skb->len; dev_kfree_skb_any((struct sk_buff *) buffer->skb); - buffer->skb = NULL; netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, "TX queue %d transmission id %x complete\n", tx_queue->queue, tx_queue->read_count); + } else if (buffer->flags & EFX_TX_BUF_HEAP) { + kfree(buffer->heap_buf); } -} -/** - * struct efx_tso_header - a DMA mapped buffer for packet headers - * @next: Linked list of free ones. - * The list is protected by the TX queue lock. - * @dma_unmap_len: Length to unmap for an oversize buffer, or 0. - * @dma_addr: The DMA address of the header below. - * - * This controls the memory used for a TSO header. Use TSOH_DATA() - * to find the packet header data. Use TSOH_SIZE() to calculate the - * total size required for a given packet header length. TSO headers - * in the free list are exactly %TSOH_STD_SIZE bytes in size. - */ -struct efx_tso_header { - union { - struct efx_tso_header *next; - size_t unmap_len; - }; - dma_addr_t dma_addr; -}; + buffer->len = 0; + buffer->flags = 0; +} static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb); -static void efx_fini_tso(struct efx_tx_queue *tx_queue); -static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, - struct efx_tso_header *tsoh); - -static void efx_tsoh_free(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer) -{ - if (buffer->tsoh) { - if (likely(!buffer->tsoh->unmap_len)) { - buffer->tsoh->next = tx_queue->tso_headers_free; - tx_queue->tso_headers_free = buffer->tsoh; - } else { - efx_tsoh_heap_free(tx_queue, buffer->tsoh); - } - buffer->tsoh = NULL; - } -} - static inline unsigned efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) @@ -138,6 +95,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) return max_descs; } +/* Get partner of a TX queue, seen as part of the same net core queue */ +static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue) +{ + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) + return tx_queue - EFX_TXQ_TYPE_OFFLOAD; + else + return tx_queue + EFX_TXQ_TYPE_OFFLOAD; +} + +static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) +{ + /* We need to consider both queues that the net core sees as one */ + struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1); + struct efx_nic *efx = txq1->efx; + unsigned int fill_level; + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + if (likely(fill_level < efx->txq_stop_thresh)) + return; + + /* We used the stale old_read_count above, which gives us a + * pessimistic estimate of the fill level (which may even + * validly be >= efx->txq_entries). Now try again using + * read_count (more likely to be a cache miss). + * + * If we read read_count and then conditionally stop the + * queue, it is possible for the completion path to race with + * us and complete all outstanding descriptors in the middle, + * after which there will be no more completions to wake it. + * Therefore we stop the queue first, then read read_count + * (with a memory barrier to ensure the ordering), then + * restart the queue if the fill level turns out to be low + * enough. + */ + netif_tx_stop_queue(txq1->core_txq); + smp_mb(); + txq1->old_read_count = ACCESS_ONCE(txq1->read_count); + txq2->old_read_count = ACCESS_ONCE(txq2->read_count); + + fill_level = max(txq1->insert_count - txq1->old_read_count, + txq2->insert_count - txq2->old_read_count); + EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries); + if (likely(fill_level < efx->txq_stop_thresh)) { + smp_mb(); + if (likely(!efx->loopback_selftest)) + netif_tx_start_queue(txq1->core_txq); + } +} + /* * Add a socket buffer to a TX queue * @@ -151,7 +158,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) * This function is split out from efx_hard_start_xmit to allow the * loopback test to direct packets via specific TX queues. * - * Returns NETDEV_TX_OK or NETDEV_TX_BUSY + * Returns NETDEV_TX_OK. * You must hold netif_tx_lock() to call this function. */ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) @@ -160,12 +167,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) struct device *dma_dev = &efx->pci_dev->dev; struct efx_tx_buffer *buffer; skb_frag_t *fragment; - unsigned int len, unmap_len = 0, fill_level, insert_ptr; + unsigned int len, unmap_len = 0, insert_ptr; dma_addr_t dma_addr, unmap_addr = 0; unsigned int dma_len; - bool unmap_single; - int q_space, i = 0; - netdev_tx_t rc = NETDEV_TX_OK; + unsigned short dma_flags; + int i = 0; EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); @@ -183,14 +189,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) return NETDEV_TX_OK; } - fill_level = tx_queue->insert_count - tx_queue->old_read_count; - q_space = efx->txq_entries - 1 - fill_level; - /* Map for DMA. Use dma_map_single rather than dma_map_page * since this is more efficient on machines with sparse * memory. */ - unmap_single = true; + dma_flags = EFX_TX_BUF_MAP_SINGLE; dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE); /* Process all fragments */ @@ -205,39 +208,10 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Add to TX queue, splitting across DMA boundaries */ do { - if (unlikely(q_space-- <= 0)) { - /* It might be that completions have - * happened since the xmit path last - * checked. Update the xmit path's - * copy of read_count. - */ - netif_tx_stop_queue(tx_queue->core_txq); - /* This memory barrier protects the - * change of queue state from the access - * of read_count. */ - smp_mb(); - tx_queue->old_read_count = - ACCESS_ONCE(tx_queue->read_count); - fill_level = (tx_queue->insert_count - - tx_queue->old_read_count); - q_space = efx->txq_entries - 1 - fill_level; - if (unlikely(q_space-- <= 0)) { - rc = NETDEV_TX_BUSY; - goto unwind; - } - smp_mb(); - if (likely(!efx->loopback_selftest)) - netif_tx_start_queue( - tx_queue->core_txq); - } - insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; buffer = &tx_queue->buffer[insert_ptr]; - efx_tsoh_free(tx_queue, buffer); - EFX_BUG_ON_PARANOID(buffer->tsoh); - EFX_BUG_ON_PARANOID(buffer->skb); + EFX_BUG_ON_PARANOID(buffer->flags); EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(!buffer->continuation); EFX_BUG_ON_PARANOID(buffer->unmap_len); dma_len = efx_max_tx_len(efx, dma_addr); @@ -247,13 +221,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Fill out per descriptor fields */ buffer->len = dma_len; buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; len -= dma_len; dma_addr += dma_len; ++tx_queue->insert_count; } while (len); /* Transfer ownership of the unmapping to the final buffer */ - buffer->unmap_single = unmap_single; + buffer->flags = EFX_TX_BUF_CONT | dma_flags; buffer->unmap_len = unmap_len; unmap_len = 0; @@ -264,20 +239,22 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) len = skb_frag_size(fragment); i++; /* Map for DMA */ - unmap_single = false; + dma_flags = 0; dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, DMA_TO_DEVICE); } /* Transfer ownership of the skb to the final buffer */ buffer->skb = skb; - buffer->continuation = false; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; netdev_tx_sent_queue(tx_queue->core_txq, skb->len); /* Pass off to hardware */ efx_nic_push_buffers(tx_queue); + efx_tx_maybe_stop_queue(tx_queue); + return NETDEV_TX_OK; dma_err: @@ -289,7 +266,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) /* Mark the packet as transmitted, and free the SKB ourselves */ dev_kfree_skb_any(skb); - unwind: /* Work backwards until we hit the original insert pointer val |