diff options
Diffstat (limited to 'drivers/net/ethernet/sfc/selftest.c')
| -rw-r--r-- | drivers/net/ethernet/sfc/selftest.c | 310 |
1 files changed, 169 insertions, 141 deletions
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 52edd24fcde..0fc5baef45b 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -1,7 +1,7 @@ /**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards + * Driver for Solarflare network controllers and boards * Copyright 2005-2006 Fen Systems Ltd. - * Copyright 2006-2010 Solarflare Communications Inc. + * Copyright 2006-2012 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -19,13 +19,22 @@ #include <linux/udp.h> #include <linux/rtnetlink.h> #include <linux/slab.h> -#include <asm/io.h> #include "net_driver.h" #include "efx.h" #include "nic.h" #include "selftest.h" #include "workarounds.h" +/* IRQ latency can be enormous because: + * - All IRQs may be disabled on a CPU for a *long* time by e.g. a + * slow serial console or an old IDE driver doing error recovery + * - The PREEMPT_RT patches mostly deal with this, but also allow a + * tasklet or normal task to be given higher priority than our IRQ + * threads + * Try to avoid blaming the hardware for this. + */ +#define IRQ_TIMEOUT HZ + /* * Loopback test packet structure * @@ -41,7 +50,7 @@ struct efx_loopback_payload { } __packed; /* Loopback test source MAC address */ -static const unsigned char payload_source[ETH_ALEN] = { +static const u8 payload_source[ETH_ALEN] __aligned(2) = { 0x00, 0x0f, 0x53, 0x1b, 0x1b, 0x1b, }; @@ -50,7 +59,7 @@ static const char payload_msg[] = /* Interrupt mode names */ static const unsigned int efx_interrupt_mode_max = EFX_INT_MODE_MAX; -static const char *efx_interrupt_mode_names[] = { +static const char *const efx_interrupt_mode_names[] = { [EFX_INT_MODE_MSIX] = "MSI-X", [EFX_INT_MODE_MSI] = "MSI", [EFX_INT_MODE_LEGACY] = "legacy", @@ -78,6 +87,9 @@ struct efx_loopback_state { struct efx_loopback_payload payload; }; +/* How long to wait for all the packets to arrive (in ms) */ +#define LOOPBACK_TIMEOUT_MS 1000 + /************************************************************************** * * MII, NVRAM and register tests @@ -108,19 +120,6 @@ static int efx_test_nvram(struct efx_nic *efx, struct efx_self_tests *tests) return rc; } -static int efx_test_chip(struct efx_nic *efx, struct efx_self_tests *tests) -{ - int rc = 0; - - /* Test register access */ - if (efx->type->test_registers) { - rc = efx->type->test_registers(efx); - tests->registers = rc ? -1 : 1; - } - - return rc; -} - /************************************************************************** * * Interrupt and event queue testing @@ -131,87 +130,117 @@ static int efx_test_chip(struct efx_nic *efx, struct efx_self_tests *tests) static int efx_test_interrupts(struct efx_nic *efx, struct efx_self_tests *tests) { + unsigned long timeout, wait; + int cpu; + netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n"); tests->interrupt = -1; - /* Reset interrupt flag */ - efx->last_irq_cpu = -1; - smp_wmb(); - - efx_nic_generate_interrupt(efx); + efx_nic_irq_test_start(efx); + timeout = jiffies + IRQ_TIMEOUT; + wait = 1; /* Wait for arrival of test interrupt. */ netif_dbg(efx, drv, efx->net_dev, "waiting for test interrupt\n"); - schedule_timeout_uninterruptible(HZ / 10); - if (efx->last_irq_cpu >= 0) - goto success; + do { + schedule_timeout_uninterruptible(wait); + cpu = efx_nic_irq_test_irq_cpu(efx); + if (cpu >= 0) + goto success; + wait *= 2; + } while (time_before(jiffies, timeout)); netif_err(efx, drv, efx->net_dev, "timed out waiting for interrupt\n"); return -ETIMEDOUT; success: netif_dbg(efx, drv, efx->net_dev, "%s test interrupt seen on CPU%d\n", - INT_MODE(efx), - efx->last_irq_cpu); + INT_MODE(efx), cpu); tests->interrupt = 1; return 0; } /* Test generation and receipt of interrupting events */ -static int efx_test_eventq_irq(struct efx_channel *channel, +static int efx_test_eventq_irq(struct efx_nic *efx, struct efx_self_tests *tests) { - struct efx_nic *efx = channel->efx; - unsigned int read_ptr, count; + struct efx_channel *channel; + unsigned int read_ptr[EFX_MAX_CHANNELS]; + unsigned long napi_ran = 0, dma_pend = 0, int_pend = 0; + unsigned long timeout, wait; - tests->eventq_dma[channel->channel] = -1; - tests->eventq_int[channel->channel] = -1; - tests->eventq_poll[channel->channel] = -1; + BUILD_BUG_ON(EFX_MAX_CHANNELS > BITS_PER_LONG); - read_ptr = channel->eventq_read_ptr; - channel->efx->last_irq_cpu = -1; - smp_wmb(); + efx_for_each_channel(channel, efx) { + read_ptr[channel->channel] = channel->eventq_read_ptr; + set_bit(channel->channel, &dma_pend); + set_bit(channel->channel, &int_pend); + efx_nic_event_test_start(channel); + } - efx_nic_generate_test_event(channel); + timeout = jiffies + IRQ_TIMEOUT; + wait = 1; - /* Wait for arrival of interrupt */ - count = 0; + /* Wait for arrival of interrupts. NAPI processing may or may + * not complete in time, but we can cope in any case. + */ do { - schedule_timeout_uninterruptible(HZ / 100); - - if (ACCESS_ONCE(channel->eventq_read_ptr) != read_ptr) - goto eventq_ok; - } while (++count < 2); - - netif_err(efx, drv, efx->net_dev, - "channel %d timed out waiting for event queue\n", - channel->channel); + schedule_timeout_uninterruptible(wait); + + efx_for_each_channel(channel, efx) { + napi_disable(&channel->napi_str); + if (channel->eventq_read_ptr != + read_ptr[channel->channel]) { + set_bit(channel->channel, &napi_ran); + clear_bit(channel->channel, &dma_pend); + clear_bit(channel->channel, &int_pend); + } else { + if (efx_nic_event_present(channel)) + clear_bit(channel->channel, &dma_pend); + if (efx_nic_event_test_irq_cpu(channel) >= 0) + clear_bit(channel->channel, &int_pend); + } + napi_enable(&channel->napi_str); + efx_nic_eventq_read_ack(channel); + } - /* See if interrupt arrived */ - if (channel->efx->last_irq_cpu >= 0) { - netif_err(efx, drv, efx->net_dev, - "channel %d saw interrupt on CPU%d " - "during event queue test\n", channel->channel, - raw_smp_processor_id()); - tests->eventq_int[channel->channel] = 1; - } + wait *= 2; + } while ((dma_pend || int_pend) && time_before(jiffies, timeout)); - /* Check to see if event was received even if interrupt wasn't */ - if (efx_nic_event_present(channel)) { - netif_err(efx, drv, efx->net_dev, - "channel %d event was generated, but " - "failed to trigger an interrupt\n", channel->channel); - tests->eventq_dma[channel->channel] = 1; + efx_for_each_channel(channel, efx) { + bool dma_seen = !test_bit(channel->channel, &dma_pend); + bool int_seen = !test_bit(channel->channel, &int_pend); + + tests->eventq_dma[channel->channel] = dma_seen ? 1 : -1; + tests->eventq_int[channel->channel] = int_seen ? 1 : -1; + + if (dma_seen && int_seen) { + netif_dbg(efx, drv, efx->net_dev, + "channel %d event queue passed (with%s NAPI)\n", + channel->channel, + test_bit(channel->channel, &napi_ran) ? + "" : "out"); + } else { + /* Report failure and whether either interrupt or DMA + * worked + */ + netif_err(efx, drv, efx->net_dev, + "channel %d timed out waiting for event queue\n", + channel->channel); + if (int_seen) + netif_err(efx, drv, efx->net_dev, + "channel %d saw interrupt " + "during event queue test\n", + channel->channel); + if (dma_seen) + netif_err(efx, drv, efx->net_dev, + "channel %d event was generated, but " + "failed to trigger an interrupt\n", + channel->channel); + } } - return -ETIMEDOUT; - eventq_ok: - netif_dbg(efx, drv, efx->net_dev, "channel %d event queue passed\n", - channel->channel); - tests->eventq_dma[channel->channel] = 1; - tests->eventq_int[channel->channel] = 1; - tests->eventq_poll[channel->channel] = 1; - return 0; + return (dma_pend || int_pend) ? -ETIMEDOUT : 0; } static int efx_test_phy(struct efx_nic *efx, struct efx_self_tests *tests, @@ -316,7 +345,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx, return; err: -#ifdef EFX_ENABLE_DEBUG +#ifdef DEBUG if (atomic_read(&state->rx_bad) == 0) { netif_err(efx, drv, efx->net_dev, "received packet:\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, @@ -337,14 +366,14 @@ static void efx_iterate_state(struct efx_nic *efx) struct efx_loopback_payload *payload = &state->payload; /* Initialise the layerII header */ - memcpy(&payload->header.h_dest, net_dev->dev_addr, ETH_ALEN); - memcpy(&payload->header.h_source, &payload_source, ETH_ALEN); + ether_addr_copy((u8 *)&payload->header.h_dest, net_dev->dev_addr); + ether_addr_copy((u8 *)&payload->header.h_source, payload_source); payload->header.h_proto = htons(ETH_P_IP); /* saddr set later and used as incrementing count */ payload->ip.daddr = htonl(INADDR_LOOPBACK); payload->ip.ihl = 5; - payload->ip.check = htons(0xdead); + payload->ip.check = (__force __sum16) htons(0xdead); payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr)); payload->ip.version = IPVERSION; payload->ip.protocol = IPPROTO_UDP; @@ -395,11 +424,9 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) * interrupt handler. */ smp_wmb(); - if (efx_dev_registered(efx)) - netif_tx_lock_bh(efx->net_dev); + netif_tx_lock_bh(efx->net_dev); rc = efx_enqueue_skb(tx_queue, skb); - if (efx_dev_registered(efx)) - netif_tx_unlock_bh(efx->net_dev); + netif_tx_unlock_bh(efx->net_dev); if (rc != NETDEV_TX_OK) { netif_err(efx, drv, efx->net_dev, @@ -420,14 +447,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) static int efx_poll_loopback(struct efx_nic *efx) { struct efx_loopback_state *state = efx->loopback_selftest; - struct efx_channel *channel; - /* NAPI polling is not enabled, so process channels - * synchronously */ - efx_for_each_channel(channel, efx) { - if (channel->work_pending) - efx_process_channel_now(channel); - } return atomic_read(&state->rx_good) == state->packet_count; } @@ -440,20 +460,18 @@ static int efx_end_loopback(struct efx_tx_queue *tx_queue, int tx_done = 0, rx_good, rx_bad; int i, rc = 0; - if (efx_dev_registered(efx)) - netif_tx_lock_bh(efx->net_dev); + netif_tx_lock_bh(efx->net_dev); /* Count the number of tx completions, and decrement the refcnt. Any * skbs not already completed will be free'd when the queue is flushed */ - for (i=0; i < state->packet_count; i++) { + for (i = 0; i < state->packet_count; i++) { skb = state->skbs[i]; if (skb && !skb_shared(skb)) ++tx_done; - dev_kfree_skb_any(skb); + dev_kfree_skb(skb); } - if (efx_dev_registered(efx)) - netif_tx_unlock_bh(efx->net_dev); + netif_tx_unlock_bh(efx->net_dev); /* Check TX completion and received packet counts */ rx_good = atomic_read(&state->rx_good); @@ -518,10 +536,10 @@ efx_test_loopback(struct efx_tx_queue *tx_queue, begin_rc = efx_begin_loopback(tx_queue); /* This will normally complete very quickly, but be - * prepared to wait up to 100 ms. */ + * prepared to wait much longer. */ msleep(1); if (!efx_poll_loopback(efx)) { - msleep(100); + msleep(LOOPBACK_TIMEOUT_MS); efx_poll_loopback(efx); } @@ -561,16 +579,12 @@ static int efx_wait_for_link(struct efx_nic *efx) mutex_lock(&efx->mac_lock); efx->type->monitor(efx); mutex_unlock(&efx->mac_lock); - } else { - struct efx_channel *channel = efx_get_channel(efx, 0); - if (channel->work_pending) - efx_process_channel_now(channel); } mutex_lock(&efx->mac_lock); link_up = link_state->up; if (link_up) - link_up = !efx->mac_op->check_fault(efx); + link_up = !efx->type->check_mac_fault(efx); mutex_unlock(&efx->mac_lock); if (link_up) { @@ -589,7 +603,8 @@ static int efx_test_loopbacks(struct efx_nic *efx, struct efx_self_tests *tests, { enum efx_loopback_mode mode; struct efx_loopback_state *state; - struct efx_channel *channel = efx_get_channel(efx, 0); + struct efx_channel *channel = + efx_get_channel(efx, efx->tx_channel_offset); struct efx_tx_queue *tx_queue; int rc = 0; @@ -661,9 +676,9 @@ int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests, { enum efx_loopback_mode loopback_mode = efx->loopback_mode; int phy_mode = efx->phy_mode; - enum reset_type reset_method = RESET_TYPE_INVISIBLE; - struct efx_channel *channel; - int rc_test = 0, rc_reset = 0, rc; + int rc_test = 0, rc_reset, rc; + + efx_selftest_async_cancel(efx); /* Online (i.e. non-disruptive) testing * This checks interrupt generation, event delivery and PHY presence. */ @@ -680,11 +695,9 @@ int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests, if (rc && !rc_test) rc_test = rc; - efx_for_each_channel(channel, efx) { - rc = efx_test_eventq_irq(channel, tests); - if (rc && !rc_test) - rc_test = rc; - } + rc = efx_test_eventq_irq(efx, tests); + if (rc && !rc_test) + rc_test = rc; if (rc_test) return rc_test; @@ -698,46 +711,28 @@ int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests, /* Detach the device so the kernel doesn't transmit during the * loopback test and the watchdog timeout doesn't fire. */ - netif_device_detach(efx->net_dev); + efx_device_detach_sync(efx); + + if (efx->type->test_chip) { + rc_reset = efx->type->test_chip(efx, tests); + if (rc_reset) { + netif_err(efx, hw, efx->net_dev, + "Unable to recover from chip test\n"); + efx_schedule_reset(efx, RESET_TYPE_DISABLE); + return rc_reset; + } - mutex_lock(&efx->mac_lock); - if (efx->loopback_modes) { - /* We need the 312 clock from the PHY to test the XMAC - * registers, so move into XGMII loopback if available */ - if (efx->loopback_modes & (1 << LOOPBACK_XGMII)) - efx->loopback_mode = LOOPBACK_XGMII; - else - efx->loopback_mode = __ffs(efx->loopback_modes); + if ((tests->memory < 0 || tests->registers < 0) && !rc_test) + rc_test = -EIO; } - __efx_reconfigure_port(efx); - mutex_unlock(&efx->mac_lock); - - /* free up all consumers of SRAM (including all the queues) */ - efx_reset_down(efx, reset_method); - - rc = efx_test_chip(efx, tests); - if (rc && !rc_test) - rc_test = rc; - - /* reset the chip to recover from the register test */ - rc_reset = efx->type->reset(efx, reset_method); - /* Ensure that the phy is powered and out of loopback * for the bist and loopback tests */ + mutex_lock(&efx->mac_lock); efx->phy_mode &= ~PHY_MODE_LOW_POWER; efx->loopback_mode = LOOPBACK_NONE; - - rc = efx_reset_up(efx, reset_method, rc_reset == 0); - if (rc && !rc_reset) - rc_reset = rc; - - if (rc_reset) { - netif_err(efx, drv, efx->net_dev, - "Unable to recover from chip test\n"); - efx_schedule_reset(efx, RESET_TYPE_DISABLE); - return rc_reset; - } + __efx_reconfigure_port(efx); + mutex_unlock(&efx->mac_lock); rc = efx_test_phy(efx, tests, flags); if (rc && !rc_test) @@ -759,3 +754,36 @@ int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests, return rc_test; } +void efx_selftest_async_start(struct efx_nic *efx) +{ + struct efx_channel *channel; + + efx_for_each_channel(channel, efx) + efx_nic_event_test_start(channel); + schedule_delayed_work(&efx->selftest_work, IRQ_TIMEOUT); +} + +void efx_selftest_async_cancel(struct efx_nic *efx) +{ + cancel_delayed_work_sync(&efx->selftest_work); +} + +void efx_selftest_async_work(struct work_struct *data) +{ + struct efx_nic *efx = container_of(data, struct efx_nic, + selftest_work.work); + struct efx_channel *channel; + int cpu; + + efx_for_each_channel(channel, efx) { + cpu = efx_nic_event_test_irq_cpu(channel); + if (cpu < 0) + netif_err(efx, ifup, efx->net_dev, + "channel %d failed to trigger an interrupt\n", + channel->channel); + else + netif_dbg(efx, ifup, efx->net_dev, + "channel %d triggered interrupt on CPU %d\n", + channel->channel, cpu); + } +} |
