diff options
Diffstat (limited to 'drivers/net/ethernet/sfc/nic.c')
-rw-r--r-- | drivers/net/ethernet/sfc/nic.c | 1969 |
1 files changed, 1969 insertions, 0 deletions
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c new file mode 100644 index 00000000000..bafa23a6874 --- /dev/null +++ b/drivers/net/ethernet/sfc/nic.c @@ -0,0 +1,1969 @@ +/**************************************************************************** + * Driver for Solarflare Solarstorm network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2011 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include "net_driver.h" +#include "bitfield.h" +#include "efx.h" +#include "nic.h" +#include "regs.h" +#include "io.h" +#include "workarounds.h" + +/************************************************************************** + * + * Configurable values + * + ************************************************************************** + */ + +/* This is set to 16 for a good reason. In summary, if larger than + * 16, the descriptor cache holds more than a default socket + * buffer's worth of packets (for UDP we can only have at most one + * socket buffer's worth outstanding). This combined with the fact + * that we only get 1 TX event per descriptor cache means the NIC + * goes idle. + */ +#define TX_DC_ENTRIES 16 +#define TX_DC_ENTRIES_ORDER 1 + +#define RX_DC_ENTRIES 64 +#define RX_DC_ENTRIES_ORDER 3 + +/* If EFX_MAX_INT_ERRORS internal errors occur within + * EFX_INT_ERROR_EXPIRE seconds, we consider the NIC broken and + * disable it. + */ +#define EFX_INT_ERROR_EXPIRE 3600 +#define EFX_MAX_INT_ERRORS 5 + +/* We poll for events every FLUSH_INTERVAL ms, and check FLUSH_POLL_COUNT times + */ +#define EFX_FLUSH_INTERVAL 10 +#define EFX_FLUSH_POLL_COUNT 100 + +/* Size and alignment of special buffers (4KB) */ +#define EFX_BUF_SIZE 4096 + +/* Depth of RX flush request fifo */ +#define EFX_RX_FLUSH_COUNT 4 + +/* Generated event code for efx_generate_test_event() */ +#define EFX_CHANNEL_MAGIC_TEST(_channel) \ + (0x00010100 + (_channel)->channel) + +/* Generated event code for efx_generate_fill_event() */ +#define EFX_CHANNEL_MAGIC_FILL(_channel) \ + (0x00010200 + (_channel)->channel) + +/************************************************************************** + * + * Solarstorm hardware access + * + **************************************************************************/ + +static inline void efx_write_buf_tbl(struct efx_nic *efx, efx_qword_t *value, + unsigned int index) +{ + efx_sram_writeq(efx, efx->membase + efx->type->buf_tbl_base, + value, index); +} + +/* Read the current event from the event queue */ +static inline efx_qword_t *efx_event(struct efx_channel *channel, + unsigned int index) +{ + return ((efx_qword_t *) (channel->eventq.addr)) + + (index & channel->eventq_mask); +} + +/* See if an event is present + * + * We check both the high and low dword of the event for all ones. We + * wrote all ones when we cleared the event, and no valid event can + * have all ones in either its high or low dwords. This approach is + * robust against reordering. + * + * Note that using a single 64-bit comparison is incorrect; even + * though the CPU read will be atomic, the DMA write may not be. + */ +static inline int efx_event_present(efx_qword_t *event) +{ + return !(EFX_DWORD_IS_ALL_ONES(event->dword[0]) | + EFX_DWORD_IS_ALL_ONES(event->dword[1])); +} + +static bool efx_masked_compare_oword(const efx_oword_t *a, const efx_oword_t *b, + const efx_oword_t *mask) +{ + return ((a->u64[0] ^ b->u64[0]) & mask->u64[0]) || + ((a->u64[1] ^ b->u64[1]) & mask->u64[1]); +} + +int efx_nic_test_registers(struct efx_nic *efx, + const struct efx_nic_register_test *regs, + size_t n_regs) +{ + unsigned address = 0, i, j; + efx_oword_t mask, imask, original, reg, buf; + + /* Falcon should be in loopback to isolate the XMAC from the PHY */ + WARN_ON(!LOOPBACK_INTERNAL(efx)); + + for (i = 0; i < n_regs; ++i) { + address = regs[i].address; + mask = imask = regs[i].mask; + EFX_INVERT_OWORD(imask); + + efx_reado(efx, &original, address); + + /* bit sweep on and off */ + for (j = 0; j < 128; j++) { + if (!EFX_EXTRACT_OWORD32(mask, j, j)) + continue; + + /* Test this testable bit can be set in isolation */ + EFX_AND_OWORD(reg, original, mask); + EFX_SET_OWORD32(reg, j, j, 1); + + efx_writeo(efx, ®, address); + efx_reado(efx, &buf, address); + + if (efx_masked_compare_oword(®, &buf, &mask)) + goto fail; + + /* Test this testable bit can be cleared in isolation */ + EFX_OR_OWORD(reg, original, mask); + EFX_SET_OWORD32(reg, j, j, 0); + + efx_writeo(efx, ®, address); + efx_reado(efx, &buf, address); + + if (efx_masked_compare_oword(®, &buf, &mask)) + goto fail; + } + + efx_writeo(efx, &original, address); + } + + return 0; + +fail: + netif_err(efx, hw, efx->net_dev, + "wrote "EFX_OWORD_FMT" read "EFX_OWORD_FMT + " at address 0x%x mask "EFX_OWORD_FMT"\n", EFX_OWORD_VAL(reg), + EFX_OWORD_VAL(buf), address, EFX_OWORD_VAL(mask)); + return -EIO; +} + +/************************************************************************** + * + * Special buffer handling + * Special buffers are used for event queues and the TX and RX + * descriptor rings. + * + *************************************************************************/ + +/* + * Initialise a special buffer + * + * This will define a buffer (previously allocated via + * efx_alloc_special_buffer()) in the buffer table, allowing + * it to be used for event queues, descriptor rings etc. + */ +static void +efx_init_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer) +{ + efx_qword_t buf_desc; + int index; + dma_addr_t dma_addr; + int i; + + EFX_BUG_ON_PARANOID(!buffer->addr); + + /* Write buffer descriptors to NIC */ + for (i = 0; i < buffer->entries; i++) { + index = buffer->index + i; + dma_addr = buffer->dma_addr + (i * 4096); + netif_dbg(efx, probe, efx->net_dev, + "mapping special buffer %d at %llx\n", + index, (unsigned long long)dma_addr); + EFX_POPULATE_QWORD_3(buf_desc, + FRF_AZ_BUF_ADR_REGION, 0, + FRF_AZ_BUF_ADR_FBUF, dma_addr >> 12, + FRF_AZ_BUF_OWNER_ID_FBUF, 0); + efx_write_buf_tbl(efx, &buf_desc, index); + } +} + +/* Unmaps a buffer and clears the buffer table entries */ +static void +efx_fini_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer) +{ + efx_oword_t buf_tbl_upd; + unsigned int start = buffer->index; + unsigned int end = (buffer->index + buffer->entries - 1); + + if (!buffer->entries) + return; + + netif_dbg(efx, hw, efx->net_dev, "unmapping special buffers %d-%d\n", + buffer->index, buffer->index + buffer->entries - 1); + + EFX_POPULATE_OWORD_4(buf_tbl_upd, + FRF_AZ_BUF_UPD_CMD, 0, + FRF_AZ_BUF_CLR_CMD, 1, + FRF_AZ_BUF_CLR_END_ID, end, + FRF_AZ_BUF_CLR_START_ID, start); + efx_writeo(efx, &buf_tbl_upd, FR_AZ_BUF_TBL_UPD); +} + +/* + * Allocate a new special buffer + * + * This allocates memory for a new buffer, clears it and allocates a + * new buffer ID range. It does not write into the buffer table. + * + * This call will allocate 4KB buffers, since 8KB buffers can't be + * used for event queues and descriptor rings. + */ +static int efx_alloc_special_buffer(struct efx_nic *efx, + struct efx_special_buffer *buffer, + unsigned int len) +{ + len = ALIGN(len, EFX_BUF_SIZE); + + buffer->addr = dma_alloc_coherent(&efx->pci_dev->dev, len, + &buffer->dma_addr, GFP_KERNEL); + if (!buffer->addr) + return -ENOMEM; + buffer->len = len; + buffer->entries = len / EFX_BUF_SIZE; + BUG_ON(buffer->dma_addr & (EFX_BUF_SIZE - 1)); + + /* All zeros is a potentially valid event so memset to 0xff */ + memset(buffer->addr, 0xff, len); + + /* Select new buffer ID */ + buffer->index = efx->next_buffer_table; + efx->next_buffer_table += buffer->entries; + + netif_dbg(efx, probe, efx->net_dev, + "allocating special buffers %d-%d at %llx+%x " + "(virt %p phys %llx)\n", buffer->index, + buffer->index + buffer->entries - 1, + (u64)buffer->dma_addr, len, + buffer->addr, (u64)virt_to_phys(buffer->addr)); + + return 0; +} + +static void +efx_free_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer) +{ + if (!buffer->addr) + return; + + netif_dbg(efx, hw, efx->net_dev, + "deallocating special buffers %d-%d at %llx+%x " + "(virt %p phys %llx)\n", buffer->index, + buffer->index + buffer->entries - 1, + (u64)buffer->dma_addr, buffer->len, + buffer->addr, (u64)virt_to_phys(buffer->addr)); + + dma_free_coherent(&efx->pci_dev->dev, buffer->len, buffer->addr, + buffer->dma_addr); + buffer->addr = NULL; + buffer->entries = 0; +} + +/************************************************************************** + * + * Generic buffer handling + * These buffers are used for interrupt status and MAC stats + * + **************************************************************************/ + +int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer, + unsigned int len) +{ + buffer->addr = pci_alloc_consistent(efx->pci_dev, len, + &buffer->dma_addr); + if (!buffer->addr) + return -ENOMEM; + buffer->len = len; + memset(buffer->addr, 0, len); + return 0; +} + +void efx_nic_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer) +{ + if (buffer->addr) { + pci_free_consistent(efx->pci_dev, buffer->len, + buffer->addr, buffer->dma_addr); + buffer->addr = NULL; + } +} + +/************************************************************************** + * + * TX path + * + **************************************************************************/ + +/* Returns a pointer to the specified transmit descriptor in the TX + * descriptor queue belonging to the specified channel. + */ +static inline efx_qword_t * +efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index) +{ + return ((efx_qword_t *) (tx_queue->txd.addr)) + index; +} + +/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */ +static inline void efx_notify_tx_desc(struct efx_tx_queue *tx_queue) +{ + unsigned write_ptr; + efx_dword_t reg; + + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + EFX_POPULATE_DWORD_1(reg, FRF_AZ_TX_DESC_WPTR_DWORD, write_ptr); + efx_writed_page(tx_queue->efx, ®, + FR_AZ_TX_DESC_UPD_DWORD_P0, tx_queue->queue); +} + +/* Write pointer and first descriptor for TX descriptor ring */ +static inline void efx_push_tx_desc(struct efx_tx_queue *tx_queue, + const efx_qword_t *txd) +{ + unsigned write_ptr; + efx_oword_t reg; + + BUILD_BUG_ON(FRF_AZ_TX_DESC_LBN != 0); + BUILD_BUG_ON(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0); + + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + EFX_POPULATE_OWORD_2(reg, FRF_AZ_TX_DESC_PUSH_CMD, true, + FRF_AZ_TX_DESC_WPTR, write_ptr); + reg.qword[0] = *txd; + efx_writeo_page(tx_queue->efx, ®, + FR_BZ_TX_DESC_UPD_P0, tx_queue->queue); +} + +static inline bool +efx_may_push_tx_desc(struct efx_tx_queue *tx_queue, unsigned int write_count) +{ + unsigned empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count); + + if (empty_read_count == 0) + return false; + + tx_queue->empty_read_count = 0; + return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0; +} + +/* For each entry inserted into the software descriptor ring, create a + * descriptor in the hardware TX descriptor ring (in host memory), and + * write a doorbell. + */ +void efx_nic_push_buffers(struct efx_tx_queue *tx_queue) +{ + + struct efx_tx_buffer *buffer; + efx_qword_t *txd; + unsigned write_ptr; + unsigned old_write_count = tx_queue->write_count; + + BUG_ON(tx_queue->write_count == tx_queue->insert_count); + + do { + write_ptr = tx_queue->write_count & tx_queue->ptr_mask; + buffer = &tx_queue->buffer[write_ptr]; + txd = efx_tx_desc(tx_queue, write_ptr); + ++tx_queue->write_count; + + /* Create TX descriptor ring entry */ + EFX_POPULATE_QWORD_4(*txd, + FSF_AZ_TX_KER_CONT, buffer->continuation, + FSF_AZ_TX_KER_BYTE_COUNT, buffer->len, + FSF_AZ_TX_KER_BUF_REGION, 0, + FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr); + } while (tx_queue->write_count != tx_queue->insert_count); + + wmb(); /* Ensure descriptors are written before they are fetched */ + + if (efx_may_push_tx_desc(tx_queue, old_write_count)) { + txd = efx_tx_desc(tx_queue, + old_write_count & tx_queue->ptr_mask); + efx_push_tx_desc(tx_queue, txd); + ++tx_queue->pushes; + } else { + efx_notify_tx_desc(tx_queue); + } +} + +/* Allocate hardware resources for a TX queue */ +int efx_nic_probe_tx(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + unsigned entries; + + entries = tx_queue->ptr_mask + 1; + return efx_alloc_special_buffer(efx, &tx_queue->txd, + entries * sizeof(efx_qword_t)); +} + +void efx_nic_init_tx(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + efx_oword_t reg; + + tx_queue->flushed = FLUSH_NONE; + + /* Pin TX descriptor ring */ + efx_init_special_buffer(efx, &tx_queue->txd); + + /* Push TX descriptor ring to card */ + EFX_POPULATE_OWORD_10(reg, + FRF_AZ_TX_DESCQ_EN, 1, + FRF_AZ_TX_ISCSI_DDIG_EN, 0, + FRF_AZ_TX_ISCSI_HDIG_EN, 0, + FRF_AZ_TX_DESCQ_BUF_BASE_ID, tx_queue->txd.index, + FRF_AZ_TX_DESCQ_EVQ_ID, + tx_queue->channel->channel, + FRF_AZ_TX_DESCQ_OWNER_ID, 0, + FRF_AZ_TX_DESCQ_LABEL, tx_queue->queue, + FRF_AZ_TX_DESCQ_SIZE, + __ffs(tx_queue->txd.entries), + FRF_AZ_TX_DESCQ_TYPE, 0, + FRF_BZ_TX_NON_IP_DROP_DIS, 1); + + if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) { + int csum = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD; + EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum); + EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS, + !csum); + } + + efx_writeo_table(efx, ®, efx->type->txd_ptr_tbl_base, + tx_queue->queue); + + if (efx_nic_rev(efx) < EFX_REV_FALCON_B0) { + /* Only 128 bits in this register */ + BUILD_BUG_ON(EFX_MAX_TX_QUEUES > 128); + + efx_reado(efx, ®, FR_AA_TX_CHKSM_CFG); + if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) + clear_bit_le(tx_queue->queue, (void *)®); + else + set_bit_le(tx_queue->queue, (void *)®); + efx_writeo(efx, ®, FR_AA_TX_CHKSM_CFG); + } + + if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) { + EFX_POPULATE_OWORD_1(reg, + FRF_BZ_TX_PACE, + (tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ? + FFE_BZ_TX_PACE_OFF : + FFE_BZ_TX_PACE_RESERVED); + efx_writeo_table(efx, ®, FR_BZ_TX_PACE_TBL, + tx_queue->queue); + } +} + +static void efx_flush_tx_queue(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + efx_oword_t tx_flush_descq; + + tx_queue->flushed = FLUSH_PENDING; + + /* Post a flush command */ + EFX_POPULATE_OWORD_2(tx_flush_descq, + FRF_AZ_TX_FLUSH_DESCQ_CMD, 1, + FRF_AZ_TX_FLUSH_DESCQ, tx_queue->queue); + efx_writeo(efx, &tx_flush_descq, FR_AZ_TX_FLUSH_DESCQ); +} + +void efx_nic_fini_tx(struct efx_tx_queue *tx_queue) +{ + struct efx_nic *efx = tx_queue->efx; + efx_oword_t tx_desc_ptr; + + /* The queue should have been flushed */ + WARN_ON(tx_queue->flushed != FLUSH_DONE); + + /* Remove TX descriptor ring from card */ + EFX_ZERO_OWORD(tx_desc_ptr); + efx_writeo_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base, + tx_queue->queue); + + /* Unpin TX descriptor ring */ + efx_fini_special_buffer(efx, &tx_queue->txd); +} + +/* Free buffers backing TX queue */ +void efx_nic_remove_tx(struct efx_tx_queue *tx_queue) +{ + efx_free_special_buffer(tx_queue->efx, &tx_queue->txd); +} + +/************************************************************************** + * + * RX path + * + **************************************************************************/ + +/* Returns a pointer to the specified descriptor in the RX descriptor queue */ +static inline efx_qword_t * +efx_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index) +{ + return ((efx_qword_t *) (rx_queue->rxd.addr)) + index; +} + +/* This creates an entry in the RX descriptor queue */ +static inline void +efx_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned index) +{ + struct efx_rx_buffer *rx_buf; + efx_qword_t *rxd; + + rxd = efx_rx_desc(rx_queue, index); + rx_buf = efx_rx_buffer(rx_queue, index); + EFX_POPULATE_QWORD_3(*rxd, + FSF_AZ_RX_KER_BUF_SIZE, + rx_buf->len - + rx_queue->efx->type->rx_buffer_padding, + FSF_AZ_RX_KER_BUF_REGION, 0, + FSF_AZ_RX_KER_BUF_ADDR, rx_buf->dma_addr); +} + +/* This writes to the RX_DESC_WPTR register for the specified receive + * descriptor ring. + */ +void efx_nic_notify_rx_desc(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + efx_dword_t reg; + unsigned write_ptr; + + while (rx_queue->notified_count != rx_queue->added_count) { + efx_build_rx_desc( + rx_queue, + rx_queue->notified_count & rx_queue->ptr_mask); + ++rx_queue->notified_count; + } + + wmb(); + write_ptr = rx_queue->added_count & rx_queue->ptr_mask; + EFX_POPULATE_DWORD_1(reg, FRF_AZ_RX_DESC_WPTR_DWORD, write_ptr); + efx_writed_page(efx, ®, FR_AZ_RX_DESC_UPD_DWORD_P0, + efx_rx_queue_index(rx_queue)); +} + +int efx_nic_probe_rx(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned entries; + + entries = rx_queue->ptr_mask + 1; + return efx_alloc_special_buffer(efx, &rx_queue->rxd, + entries * sizeof(efx_qword_t)); +} + +void efx_nic_init_rx(struct efx_rx_queue *rx_queue) +{ + efx_oword_t rx_desc_ptr; + struct efx_nic *efx = rx_queue->efx; + bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0; + bool iscsi_digest_en = is_b0; + + netif_dbg(efx, hw, efx->net_dev, + "RX queue %d ring in special buffers %d-%d\n", + efx_rx_queue_index(rx_queue), rx_queue->rxd.index, + rx_queue->rxd.index + rx_queue->rxd.entries - 1); + + rx_queue->flushed = FLUSH_NONE; + + /* Pin RX descriptor ring */ + efx_init_special_buffer(efx, &rx_queue->rxd); + + /* Push RX descriptor ring to card */ + EFX_POPULATE_OWORD_10(rx_desc_ptr, + FRF_AZ_RX_ISCSI_DDIG_EN, iscsi_digest_en, + FRF_AZ_RX_ISCSI_HDIG_EN, iscsi_digest_en, + FRF_AZ_RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index, + FRF_AZ_RX_DESCQ_EVQ_ID, + efx_rx_queue_channel(rx_queue)->channel, + FRF_AZ_RX_DESCQ_OWNER_ID, 0, + FRF_AZ_RX_DESCQ_LABEL, + efx_rx_queue_index(rx_queue), + FRF_AZ_RX_DESCQ_SIZE, + __ffs(rx_queue->rxd.entries), + FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ , + /* For >=B0 this is scatter so disable */ + FRF_AZ_RX_DESCQ_JUMBO, !is_b0, + FRF_AZ_RX_DESCQ_EN, 1); + efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, + efx_rx_queue_index(rx_queue)); +} + +static void efx_flush_rx_queue(struct efx_rx_queue *rx_queue) +{ + struct efx_nic *efx = rx_queue->efx; + efx_oword_t rx_flush_descq; + + rx_queue->flushed = FLUSH_PENDING; + + /* Post a flush command */ + EFX_POPULATE_OWORD_2(rx_flush_descq, + FRF_AZ_RX_FLUSH_DESCQ_CMD, 1, + FRF_AZ_RX_FLUSH_DESCQ, + efx_rx_queue_index(rx_queue)); + efx_writeo(efx, &rx_flush_descq, FR_AZ_RX_FLUSH_DESCQ); +} + +void efx_nic_fini_rx(struct efx_rx_queue *rx_queue) +{ + efx_oword_t rx_desc_ptr; + struct efx_nic *efx = rx_queue->efx; + + /* The queue should already have been flushed */ + WARN_ON(rx_queue->flushed != FLUSH_DONE); + + /* Remove RX descriptor ring from card */ + EFX_ZERO_OWORD(rx_desc_ptr); + efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base, + efx_rx_queue_index(rx_queue)); + + /* Unpin RX descriptor ring */ + efx_fini_special_buffer(efx, &rx_queue->rxd); +} + +/* Free buffers backing RX queue */ +void efx_nic_remove_rx(struct efx_rx_queue *rx_queue) +{ + efx_free_special_buffer(rx_queue->efx, &rx_queue->rxd); +} + +/************************************************************************** + * + * Event queue processing + * Event queues are processed by per-channel tasklets. + * + **************************************************************************/ + +/* Update a channel's event queue's read pointer (RPTR) register + * + * This writes the EVQ_RPTR_REG register for the specified channel's + * event queue. + */ +void efx_nic_eventq_read_ack(struct efx_channel *channel) +{ + efx_dword_t reg; + struct efx_nic *efx = channel->efx; + + EFX_POPULATE_DWORD_1(reg, FRF_AZ_EVQ_RPTR, + channel->eventq_read_ptr & channel->eventq_mask); + efx_writed_table(efx, ®, efx->type->evq_rptr_tbl_base, + channel->channel); +} + +/* Use HW to insert a SW defined event */ +static void efx_generate_event(struct efx_channel *channel, efx_qword_t *event) +{ + efx_oword_t drv_ev_reg; + + BUILD_BUG_ON(FRF_AZ_DRV_EV_DATA_LBN != 0 || + FRF_AZ_DRV_EV_DATA_WIDTH != 64); + drv_ev_reg.u32[0] = event->u32[0]; + drv_ev_reg.u32[1] = event->u32[1]; + drv_ev_reg.u32[2] = 0; + drv_ev_reg.u32[3] = 0; + EFX_SET_OWORD_FIELD(drv_ev_reg, FRF_AZ_DRV_EV_QID, channel->channel); + efx_writeo(channel->efx, &drv_ev_reg, FR_AZ_DRV_EV); +} + +/* Handle a transmit completion event + * + * The NIC batches TX completion events; the message we receive is of + * the form "complete all TX events up to this index". + */ +static int +efx_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) +{ + unsigned int tx_ev_desc_ptr; + unsigned int tx_ev_q_label; + struct efx_tx_queue *tx_queue; + struct efx_nic *efx = channel->efx; + int tx_packets = 0; + + if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) { + /* Transmit completion */ + tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR); + tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL); + tx_queue = efx_channel_get_tx_queue( + channel, tx_ev_q_label % EFX_TXQ_TYPES); + tx_packets = ((tx_ev_desc_ptr - tx_queue->read_count) & + tx_queue->ptr_mask); + channel->irq_mod_score += tx_packets; + efx_xmit_done(tx_queue, tx_ev_desc_ptr); + } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) { + /* Rewrite the FIFO write pointer */ + tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL); + tx_queue = efx_channel_get_tx_queue( + channel, tx_ev_q_label % EFX_TXQ_TYPES); + + if (efx_dev_registered(efx)) + netif_tx_lock(efx->net_dev); + efx_notify_tx_desc(tx_queue); + if (efx_dev_registered(efx)) + netif_tx_unlock(efx->net_dev); + } else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_PKT_ERR) && + EFX_WORKAROUND_10727(efx)) { + efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); + } else { + netif_err(efx, tx_err, efx->net_dev, + "channel %d unexpected TX event " + EFX_QWORD_FMT"\n", channel->channel, + EFX_QWORD_VAL(*event)); + } + + return tx_packets; +} + +/* Detect errors included in the rx_evt_pkt_ok bit. */ +static void efx_handle_rx_not_ok(struct efx_rx_queue *rx_queue, + const efx_qword_t *event, + bool *rx_ev_pkt_ok, + bool *discard) +{ + struct efx_channel *channel = efx_rx_queue_channel(rx_queue); + struct efx_nic *efx = rx_queue->efx; + bool rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err; + bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err; + bool rx_ev_frm_trunc, rx_ev_drib_nib, rx_ev_tobe_disc; + bool rx_ev_other_err, rx_ev_pause_frm; + bool rx_ev_hdr_type, rx_ev_mcast_pkt; + unsigned rx_ev_pkt_type; + + rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE); + rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT); + rx_ev_tobe_disc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_TOBE_DISC); + rx_ev_pkt_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_TYPE); + rx_ev_buf_owner_id_err = EFX_QWORD_FIELD(*event, + FSF_AZ_RX_EV_BUF_OWNER_ID_ERR); + rx_ev_ip_hdr_chksum_err = EFX_QWORD_FIELD(*event, + FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR); + rx_ev_tcp_udp_chksum_err = EFX_QWORD_FIELD(*event, + FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR); + rx_ev_eth_crc_err = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_ETH_CRC_ERR); + rx_ev_frm_trunc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_FRM_TRUNC); + rx_ev_drib_nib = ((efx_nic_rev(efx) >= EFX_REV_FALCON_B0) ? + 0 : EFX_QWORD_FIELD(*event, FSF_AA_RX_EV_DRIB_NIB)); + rx_ev_pause_frm = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PAUSE_FRM_ERR); + + /* Every error apart from tobe_disc and pause_frm */ + rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err | + rx_ev_buf_owner_id_err | rx_ev_eth_crc_err | + rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err); + + /* Count errors that are not in MAC stats. Ignore expected + * checksum errors during self-test. */ + if (rx_ev_frm_trunc) + ++channel->n_rx_frm_trunc; + else if (rx_ev_tobe_disc) + ++channel->n_rx_tobe_disc; + else if (!efx->loopback_selftest) { + if (rx_ev_ip_hdr_chksum_err) + ++channel->n_rx_ip_hdr_chksum_err; + else if (rx_ev_tcp_udp_chksum_err) + ++channel->n_rx_tcp_udp_chksum_err; + } + + /* The frame must be discarded if any of these are true. */ + *discard = (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_drib_nib | + rx_ev_tobe_disc | rx_ev_pause_frm); + + /* TOBE_DISC is expected on unicast mismatches; don't print out an + * error message. FRM_TRUNC indicates RXDP dropped the packet due + * to a FIFO overflow. + */ +#ifdef EFX_ENABLE_DEBUG + if (rx_ev_other_err && net_ratelimit()) { + netif_dbg(efx, rx_err, efx->net_dev, + " RX queue %d unexpected RX event " + EFX_QWORD_FMT "%s%s%s%s%s%s%s%s\n", + efx_rx_queue_index(rx_queue), EFX_QWORD_VAL(*event), + rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "", + rx_ev_ip_hdr_chksum_err ? + " [IP_HDR_CHKSUM_ERR]" : "", + rx_ev_tcp_udp_chksum_err ? + " [TCP_UDP_CHKSUM_ERR]" : "", + rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "", + rx_ev_frm_trunc ? " [FRM_TRUNC]" : "", + rx_ev_drib_nib ? " [DRIB_NIB]" : "", + rx_ev_tobe_disc ? " [TOBE_DISC]" : "", + rx_ev_pause_frm ? " [PAUSE]" : ""); + } +#endif +} + +/* Handle receive events that are not in-order. */ +static void +efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index) +{ + struct efx_nic *efx = rx_queue->efx; + unsigned expected, dropped; + + expected = rx_queue->removed_count & rx_queue->ptr_mask; + dropped = (index - expected) & rx_queue->ptr_mask; + netif_info(efx, rx_err, efx->net_dev, + "dropped %d events (index=%d expected=%d)\n", + dropped, index, expected); + + efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ? + RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE); +} + +/* Handle a packet received event + * + * The NIC gives a "discard" flag if it's a unicast packet with the + * wrong destination address + * Also "is multicast" and "matches multicast filter" flags can be used to + * discard non-matching multicast packets. + */ +static void +efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event) +{ + unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt; + unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt; + unsigned expected_ptr; + bool rx_ev_pkt_ok, discard = false, checksummed; + struct efx_rx_queue *rx_queue; + + /* Basic packet information */ + rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT); + rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK); + rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE); + WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT)); + WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP) != 1); + WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) != + channel->channel); + + rx_queue = efx_channel_get_rx_queue(channel); + + rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR); + expected_ptr = rx_queue->removed_count & rx_queue->ptr_mask; + if (unlikely(rx_ev_desc_ptr != expected_ptr)) + efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr); + + if (likely(rx_ev_pkt_ok)) { + /* If packet is marked as OK and packet type is TCP/IP or + * UDP/IP, then we can rely on the hardware checksum. + */ + checksummed = + rx_ev_hdr_type == FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP || + rx_ev_hdr_type == FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP; + } else { + efx_handle_rx_not_ok(rx_queue, event, &rx_ev_pkt_ok, &discard); + checksummed = false; + } + + /* Detect multicast packets that didn't match the filter */ + rx_ev_mcast_pkt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT); + if (rx_ev_mcast_pkt) { + unsigned int rx_ev_mcast_hash_match = + EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_HASH_MATCH); + + if (unlikely(!rx_ev_mcast_hash_match)) { + ++channel->n_rx_mcast_mismatch; + discard = true; + } + } + + channel->irq_mod_score += 2; + + /* Handle received packet */ + efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt, + checksummed, discard); +} + +static void +efx_handle_generated_event(struct efx_channel *channel, efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + unsigned code; + + code = EFX_QWORD_FIELD(*event, FSF_AZ_DRV_GEN_EV_MAGIC); + if (code == EFX_CHANNEL_MAGIC_TEST(channel)) + ; /* ignore */ + else if (code == EFX_CHANNEL_MAGIC_FILL(channel)) + /* The queue must be empty, so we won't receive any rx + * events, so efx_process_channel() won't refill the + * queue. Refill it here */ + efx_fast_push_rx_descriptors(efx_channel_get_rx_queue(channel)); + else + netif_dbg(efx, hw, efx->net_dev, "channel %d received " + "generated event "EFX_QWORD_FMT"\n", + channel->channel, EFX_QWORD_VAL(*event)); +} + +static void +efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) +{ + struct efx_nic *efx = channel->efx; + unsigned int ev_sub_code; + unsigned int ev_sub_data; + + ev_sub_code = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBCODE); + ev_sub_data = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA); + + switch (ev_sub_code) { + case FSE_AZ_TX_DESCQ_FLS_DONE_EV: + netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", + channel->channel, ev_sub_data); + break; + case FSE_AZ_RX_DESCQ_FLS_DONE_EV: + netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", + channel->channel, ev_sub_data); + break; + case FSE_AZ_EVQ_INIT_DONE_EV: + netif_dbg(efx, hw, efx->net_dev, + "channel %d EVQ %d initialised\n", + channel->channel, ev_sub_data); + break; + case FSE_AZ_SRM_UPD_DONE_EV: + netif_vdbg(efx, hw, efx->net_dev, + "channel %d SRAM update done\n", channel->channel); + break; + case FSE_AZ_WAKE_UP_EV: + netif_vdbg(efx, hw, efx->net_dev, + "channel %d RXQ %d wakeup event\n", + channel->channel, ev_sub_data); + break; + case FSE_AZ_TIMER_EV: + netif_vdbg(efx, hw, efx->net_dev, + "channel %d RX queue %d timer expired\n", + channel->channel, ev_sub_data); + break; + case FSE_AA_RX_RECOVER_EV: + netif_err(efx, rx_err, efx->net_dev, + "channel %d seen DRIVER RX_RESET event. " + "Resetting.\n", channel->channel); + atomic_inc(&efx->rx_reset); + efx_schedule_reset(efx, + EFX_WORKAROUND_6555(efx) ? + RESET_TYPE_RX_RECOVERY : + RESET_TYPE_DISABLE); + break; + case FSE_BZ_RX_DSC_ERROR_EV: + netif_err(efx, rx_err, efx->net_dev, + "RX DMA Q %d reports descriptor fetch error." + " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); + efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); + break; + case FSE_BZ_TX_DSC_ERROR_EV: + netif_err(efx, tx_err, efx->net_dev, + "TX DMA Q %d reports descriptor fetch error." + " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); + efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); + break; + default: + netif_vdbg(efx, hw, efx->net_dev, + "channel %d unknown driver event code %d " + "data %04x\n", channel->channel, ev_sub_code, + ev_sub_data); + break; + } +} + +int efx_nic_process_eventq(struct efx_channel *channel, int budget) +{ + struct efx_nic *efx = channel->efx; + unsigned int read_ptr; + efx_qword_t event, *p_event; + int ev_code; + int tx_packets = 0; + int spent = 0; + + read_ptr = channel->eventq_read_ptr; + + for (;;) { + p_event = efx_event(channel, read_ptr); + event = *p_event; + + if (!efx_event_present(&event)) + /* End of events */ + break; + + netif_vdbg(channel->efx, intr, channel->efx->net_dev, + "channel %d event is "EFX_QWORD_FMT"\n", + channel->channel, EFX_QWORD_VAL(event)); + + /* Clear this event by marking it all ones */ + EFX_SET_QWORD(*p_event); + + ++read_ptr; + + ev_code = EFX_QWORD_FIELD(event, FSF_AZ_EV_CODE); + + switch (ev_code) { + case FSE_AZ_EV_CODE_RX_EV: + efx_handle_rx_event(channel, &event); + if (++spent == budget) + goto out; + break; + case FSE_AZ_EV_CODE_TX_EV: + tx_packets += efx_handle_tx_event(channel, &event); + if (tx_packets > efx->txq_entries) { + spent = budget; + goto out; + } + break; + case FSE_AZ_EV_CODE_DRV_GEN_EV: + efx_handle_generated_event(channel, &event); + break; + case FSE_AZ_EV_CODE_DRIVER_EV: + efx_handle_driver_event(channel, &event); + break; + case FSE_CZ_EV_CODE_MCDI_EV: + efx_mcdi_process_event(channel, &event); + break; + case FSE_AZ_EV_CODE_GLOBAL_EV: + if (efx->type->handle_global_event && + efx->type->handle_global_event(channel, &event)) + break; + /* else fall through */ + default: + netif_err(channel->efx, hw, channel->efx->net_dev, + "channel %d unknown event type %d (data " + EFX_QWORD_FMT ")\n", channel->channel, + ev_code, EFX_QWORD_VAL(event)); + } + } + +out: + channel->eventq_read_ptr = read_ptr; + return spent; +} + +/* Check whether an event is present in the eventq at the current + * read pointer. Only useful for self-test. + */ +bool efx_nic_event_present(struct efx_channel *channel) +{ + return efx_event_present(efx_event(channel, channel->eventq_read_ptr)); +} + +/* Allocate buffer table entries for event queue */ +int efx_nic_probe_eventq(struct efx_channel *channel) +{ + struct efx_nic *efx = channel->efx; + unsigned entries; + + entries = channel->eventq_mask + 1; + return efx_alloc_special_buffer(efx, &channel->eventq, + entries * sizeof(efx_qword_t)); +} + +void efx_nic_init_eventq(struct efx_channel *channel) +{ + efx_oword_t reg; + struct efx_nic *efx = channel->efx; + + netif_dbg(efx, hw, efx->net_dev, + "channel %d event queue in special buffers %d-%d\n", + channel->channel, channel->eventq.index, + channel->eventq.index + channel->eventq.entries - 1); + + if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) { + EFX_POPULATE_OWORD_3(reg, + |