diff options
Diffstat (limited to 'drivers/net/ixgbe/ixgbe_main.c')
-rw-r--r-- | drivers/net/ixgbe/ixgbe_main.c | 1520 |
1 files changed, 1176 insertions, 344 deletions
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index c2095ce531c..cb371a8c24a 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -48,7 +48,7 @@ char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = "Intel(R) 10 Gigabit PCI Express Network Driver"; -#define DRV_VERSION "1.1.18" +#define DRV_VERSION "1.3.18-k2" const char ixgbe_driver_version[] = DRV_VERSION; static const char ixgbe_copyright[] = "Copyright (c) 1999-2007 Intel Corporation."; @@ -80,6 +80,16 @@ static struct pci_device_id ixgbe_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, ixgbe_pci_tbl); +#ifdef CONFIG_DCA +static int ixgbe_notify_dca(struct notifier_block *, unsigned long event, + void *p); +static struct notifier_block dca_notifier = { + .notifier_call = ixgbe_notify_dca, + .next = NULL, + .priority = 0 +}; +#endif + MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL"); @@ -256,26 +266,125 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_adapter *adapter, * sees the new next_to_clean. */ smp_mb(); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) && + !test_bit(__IXGBE_DOWN, &adapter->state)) { + netif_wake_subqueue(netdev, tx_ring->queue_index); + adapter->restart_queue++; + } +#else if (netif_queue_stopped(netdev) && !test_bit(__IXGBE_DOWN, &adapter->state)) { netif_wake_queue(netdev); adapter->restart_queue++; } +#endif } if (adapter->detect_tx_hung) if (ixgbe_check_tx_hang(adapter, tx_ring, eop, eop_desc)) +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netif_stop_subqueue(netdev, tx_ring->queue_index); +#else netif_stop_queue(netdev); +#endif if (total_tx_packets >= tx_ring->work_limit) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, tx_ring->eims_value); + tx_ring->total_bytes += total_tx_bytes; + tx_ring->total_packets += total_tx_packets; adapter->net_stats.tx_bytes += total_tx_bytes; adapter->net_stats.tx_packets += total_tx_packets; cleaned = total_tx_packets ? true : false; return cleaned; } +#ifdef CONFIG_DCA +static void ixgbe_update_rx_dca(struct ixgbe_adapter *adapter, + struct ixgbe_ring *rxr) +{ + u32 rxctrl; + int cpu = get_cpu(); + int q = rxr - adapter->rx_ring; + + if (rxr->cpu != cpu) { + rxctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_DCA_RXCTRL(q)); + rxctrl &= ~IXGBE_DCA_RXCTRL_CPUID_MASK; + rxctrl |= dca_get_tag(cpu); + rxctrl |= IXGBE_DCA_RXCTRL_DESC_DCA_EN; + rxctrl |= IXGBE_DCA_RXCTRL_HEAD_DCA_EN; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_RXCTRL(q), rxctrl); + rxr->cpu = cpu; + } + put_cpu(); +} + +static void ixgbe_update_tx_dca(struct ixgbe_adapter *adapter, + struct ixgbe_ring *txr) +{ + u32 txctrl; + int cpu = get_cpu(); + int q = txr - adapter->tx_ring; + + if (txr->cpu != cpu) { + txctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_DCA_TXCTRL(q)); + txctrl &= ~IXGBE_DCA_TXCTRL_CPUID_MASK; + txctrl |= dca_get_tag(cpu); + txctrl |= IXGBE_DCA_TXCTRL_DESC_DCA_EN; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_TXCTRL(q), txctrl); + txr->cpu = cpu; + } + put_cpu(); +} + +static void ixgbe_setup_dca(struct ixgbe_adapter *adapter) +{ + int i; + + if (!(adapter->flags & IXGBE_FLAG_DCA_ENABLED)) + return; + + for (i = 0; i < adapter->num_tx_queues; i++) { + adapter->tx_ring[i].cpu = -1; + ixgbe_update_tx_dca(adapter, &adapter->tx_ring[i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + adapter->rx_ring[i].cpu = -1; + ixgbe_update_rx_dca(adapter, &adapter->rx_ring[i]); + } +} + +static int __ixgbe_notify_dca(struct device *dev, void *data) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct ixgbe_adapter *adapter = netdev_priv(netdev); + unsigned long event = *(unsigned long *)data; + + switch (event) { + case DCA_PROVIDER_ADD: + adapter->flags |= IXGBE_FLAG_DCA_ENABLED; + /* Always use CB2 mode, difference is masked + * in the CB driver. */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL, 2); + if (dca_add_requester(dev) == 0) { + ixgbe_setup_dca(adapter); + break; + } + /* Fall Through since DCA is disabled. */ + case DCA_PROVIDER_REMOVE: + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { + dca_remove_requester(dev); + adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL, 1); + } + break; + } + + return 0; +} + +#endif /* CONFIG_DCA */ /** * ixgbe_receive_skb - Send a completed packet up the stack * @adapter: board private structure @@ -556,10 +665,15 @@ next_desc: adapter->net_stats.rx_bytes += total_rx_bytes; adapter->net_stats.rx_packets += total_rx_packets; + rx_ring->total_packets += total_rx_packets; + rx_ring->total_bytes += total_rx_bytes; + adapter->net_stats.rx_bytes += total_rx_bytes; + adapter->net_stats.rx_packets += total_rx_packets; + return cleaned; } -#define IXGBE_MAX_INTR 10 +static int ixgbe_clean_rxonly(struct napi_struct *, int); /** * ixgbe_configure_msix - Configure MSI-X hardware * @adapter: board private structure @@ -569,28 +683,195 @@ next_desc: **/ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) { - int i, vector = 0; + struct ixgbe_q_vector *q_vector; + int i, j, q_vectors, v_idx, r_idx; + u32 mask; - for (i = 0; i < adapter->num_tx_queues; i++) { - ixgbe_set_ivar(adapter, IXGBE_IVAR_TX_QUEUE(i), - IXGBE_MSIX_VECTOR(vector)); - writel(EITR_INTS_PER_SEC_TO_REG(adapter->tx_eitr), - adapter->hw.hw_addr + adapter->tx_ring[i].itr_register); - vector++; - } + q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - for (i = 0; i < adapter->num_rx_queues; i++) { - ixgbe_set_ivar(adapter, IXGBE_IVAR_RX_QUEUE(i), - IXGBE_MSIX_VECTOR(vector)); - writel(EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr), - adapter->hw.hw_addr + adapter->rx_ring[i].itr_register); - vector++; + /* Populate the IVAR table and set the ITR values to the + * corresponding register. + */ + for (v_idx = 0; v_idx < q_vectors; v_idx++) { + q_vector = &adapter->q_vector[v_idx]; + /* XXX for_each_bit(...) */ + r_idx = find_first_bit(q_vector->rxr_idx, + adapter->num_rx_queues); + + for (i = 0; i < q_vector->rxr_count; i++) { + j = adapter->rx_ring[r_idx].reg_idx; + ixgbe_set_ivar(adapter, IXGBE_IVAR_RX_QUEUE(j), v_idx); + r_idx = find_next_bit(q_vector->rxr_idx, + adapter->num_rx_queues, + r_idx + 1); + } + r_idx = find_first_bit(q_vector->txr_idx, + adapter->num_tx_queues); + + for (i = 0; i < q_vector->txr_count; i++) { + j = adapter->tx_ring[r_idx].reg_idx; + ixgbe_set_ivar(adapter, IXGBE_IVAR_TX_QUEUE(j), v_idx); + r_idx = find_next_bit(q_vector->txr_idx, + adapter->num_tx_queues, + r_idx + 1); + } + + /* if this is a tx only vector use half the irq (tx) rate */ + if (q_vector->txr_count && !q_vector->rxr_count) + q_vector->eitr = adapter->tx_eitr; + else + /* rx only or mixed */ + q_vector->eitr = adapter->rx_eitr; + + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(v_idx), + EITR_INTS_PER_SEC_TO_REG(q_vector->eitr)); } - vector = adapter->num_tx_queues + adapter->num_rx_queues; - ixgbe_set_ivar(adapter, IXGBE_IVAR_OTHER_CAUSES_INDEX, - IXGBE_MSIX_VECTOR(vector)); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(vector), 1950); + ixgbe_set_ivar(adapter, IXGBE_IVAR_OTHER_CAUSES_INDEX, v_idx); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(v_idx), 1950); + + /* set up to autoclear timer, lsc, and the vectors */ + mask = IXGBE_EIMS_ENABLE_MASK; + mask &= ~IXGBE_EIMS_OTHER; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask); +} + +enum latency_range { + lowest_latency = 0, + low_latency = 1, + bulk_latency = 2, + latency_invalid = 255 +}; + +/** + * ixgbe_update_itr - update the dynamic ITR value based on statistics + * @adapter: pointer to adapter + * @eitr: eitr setting (ints per sec) to give last timeslice + * @itr_setting: current throttle rate in ints/second + * @packets: the number of packets during this measurement interval + * @bytes: the number of bytes during this measurement interval + * + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * this functionality is controlled by the InterruptThrottleRate module + * parameter (see ixgbe_param.c) + **/ +static u8 ixgbe_update_itr(struct ixgbe_adapter *adapter, + u32 eitr, u8 itr_setting, + int packets, int bytes) +{ + unsigned int retval = itr_setting; + u32 timepassed_us; + u64 bytes_perint; + + if (packets == 0) + goto update_itr_done; + + + /* simple throttlerate management + * 0-20MB/s lowest (100000 ints/s) + * 20-100MB/s low (20000 ints/s) + * 100-1249MB/s bulk (8000 ints/s) + */ + /* what was last interrupt timeslice? */ + timepassed_us = 1000000/eitr; + bytes_perint = bytes / timepassed_us; /* bytes/usec */ + + switch (itr_setting) { + case lowest_latency: + if (bytes_perint > adapter->eitr_low) + retval = low_latency; + break; + case low_latency: + if (bytes_perint > adapter->eitr_high) + retval = bulk_latency; + else if (bytes_perint <= adapter->eitr_low) + retval = lowest_latency; + break; + case bulk_latency: + if (bytes_perint <= adapter->eitr_high) + retval = low_latency; + break; + } + +update_itr_done: + return retval; +} + +static void ixgbe_set_itr_msix(struct ixgbe_q_vector *q_vector) +{ + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_hw *hw = &adapter->hw; + u32 new_itr; + u8 current_itr, ret_itr; + int i, r_idx, v_idx = ((void *)q_vector - (void *)(adapter->q_vector)) / + sizeof(struct ixgbe_q_vector); + struct ixgbe_ring *rx_ring, *tx_ring; + + r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues); + for (i = 0; i < q_vector->txr_count; i++) { + tx_ring = &(adapter->tx_ring[r_idx]); + ret_itr = ixgbe_update_itr(adapter, q_vector->eitr, + q_vector->tx_eitr, + tx_ring->total_packets, + tx_ring->total_bytes); + /* if the result for this queue would decrease interrupt + * rate for this vector then use that result */ + q_vector->tx_eitr = ((q_vector->tx_eitr > ret_itr) ? + q_vector->tx_eitr - 1 : ret_itr); + r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues, + r_idx + 1); + } + + r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); + for (i = 0; i < q_vector->rxr_count; i++) { + rx_ring = &(adapter->rx_ring[r_idx]); + ret_itr = ixgbe_update_itr(adapter, q_vector->eitr, + q_vector->rx_eitr, + rx_ring->total_packets, + rx_ring->total_bytes); + /* if the result for this queue would decrease interrupt + * rate for this vector then use that result */ + q_vector->rx_eitr = ((q_vector->rx_eitr > ret_itr) ? + q_vector->rx_eitr - 1 : ret_itr); + r_idx = find_next_bit(q_vector->rxr_idx, adapter->num_rx_queues, + r_idx + 1); + } + + current_itr = max(q_vector->rx_eitr, q_vector->tx_eitr); + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = 100000; + break; + case low_latency: + new_itr = 20000; /* aka hwitr = ~200 */ + break; + case bulk_latency: + default: + new_itr = 8000; + break; + } + + if (new_itr != q_vector->eitr) { + u32 itr_reg; + /* do an exponential smoothing */ + new_itr = ((q_vector->eitr * 90)/100) + ((new_itr * 10)/100); + q_vector->eitr = new_itr; + itr_reg = EITR_INTS_PER_SEC_TO_REG(new_itr); + /* must write high and low 16 bits to reset counter */ + DPRINTK(TX_ERR, DEBUG, "writing eitr(%d): %08X\n", v_idx, + itr_reg); + IXGBE_WRITE_REG(hw, IXGBE_EITR(v_idx), itr_reg | (itr_reg)<<16); + } + + return; } static irqreturn_t ixgbe_msix_lsc(int irq, void *data) @@ -614,153 +895,302 @@ static irqreturn_t ixgbe_msix_lsc(int irq, void *data) static irqreturn_t ixgbe_msix_clean_tx(int irq, void *data) { - struct ixgbe_ring *txr = data; - struct ixgbe_adapter *adapter = txr->adapter; + struct ixgbe_q_vector *q_vector = data; + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_ring *txr; + int i, r_idx; - ixgbe_clean_tx_irq(adapter, txr); + if (!q_vector->txr_count) + return IRQ_HANDLED; + + r_idx = find_first_bit(q_vector->txr_idx, adapter->num_tx_queues); + for (i = 0; i < q_vector->txr_count; i++) { + txr = &(adapter->tx_ring[r_idx]); +#ifdef CONFIG_DCA + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) + ixgbe_update_tx_dca(adapter, txr); +#endif + txr->total_bytes = 0; + txr->total_packets = 0; + ixgbe_clean_tx_irq(adapter, txr); + r_idx = find_next_bit(q_vector->txr_idx, adapter->num_tx_queues, + r_idx + 1); + } return IRQ_HANDLED; } +/** + * ixgbe_msix_clean_rx - single unshared vector rx clean (all queues) + * @irq: unused + * @data: pointer to our q_vector struct for this interrupt vector + **/ static irqreturn_t ixgbe_msix_clean_rx(int irq, void *data) { - struct ixgbe_ring *rxr = data; - struct ixgbe_adapter *adapter = rxr->adapter; + struct ixgbe_q_vector *q_vector = data; + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_ring *rxr; + int r_idx; + + r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); + if (!q_vector->rxr_count) + return IRQ_HANDLED; + + rxr = &(adapter->rx_ring[r_idx]); + /* disable interrupts on this vector only */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->v_idx); + rxr->total_bytes = 0; + rxr->total_packets = 0; + netif_rx_schedule(adapter->netdev, &q_vector->napi); + + return IRQ_HANDLED; +} + +static irqreturn_t ixgbe_msix_clean_many(int irq, void *data) +{ + ixgbe_msix_clean_rx(irq, data); + ixgbe_msix_clean_tx(irq, data); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, rxr->eims_value); - netif_rx_schedule(adapter->netdev, &adapter->napi); return IRQ_HANDLED; } +/** + * ixgbe_clean_rxonly - msix (aka one shot) rx clean routine + * @napi: napi struct with our devices info in it + * @budget: amount of work driver is allowed to do this pass, in packets + * + **/ static int ixgbe_clean_rxonly(struct napi_struct *napi, int budget) { - struct ixgbe_adapter *adapter = container_of(napi, - struct ixgbe_adapter, napi); - struct net_device *netdev = adapter->netdev; + struct ixgbe_q_vector *q_vector = + container_of(napi, struct ixgbe_q_vector, napi); + struct ixgbe_adapter *adapter = q_vector->adapter; + struct ixgbe_ring *rxr; int work_done = 0; - struct ixgbe_ring *rxr = adapter->rx_ring; + long r_idx; - /* Keep link state information with original netdev */ - if (!netif_carrier_ok(netdev)) - goto quit_polling; + r_idx = find_first_bit(q_vector->rxr_idx, adapter->num_rx_queues); + rxr = &(adapter->rx_ring[r_idx]); +#ifdef CONFIG_DCA + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) + ixgbe_update_rx_dca(adapter, rxr); +#endif ixgbe_clean_rx_irq(adapter, rxr, &work_done, budget); - /* If no Tx and not enough Rx work done, exit the polling mode */ - if ((work_done < budget) || !netif_running(netdev)) { -quit_polling: - netif_rx_complete(netdev, napi); + /* If all Rx work done, exit the polling mode */ + if (work_done < budget) { + netif_rx_complete(adapter->netdev, napi); + if (adapter->rx_eitr < IXGBE_MIN_ITR_USECS) + ixgbe_set_itr_msix(q_vector); if (!test_bit(__IXGBE_DOWN, &adapter->state)) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, - rxr->eims_value); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, rxr->v_idx); } return work_done; } +static inline void map_vector_to_rxq(struct ixgbe_adapter *a, int v_idx, + int r_idx) +{ + a->q_vector[v_idx].adapter = a; + set_bit(r_idx, a->q_vector[v_idx].rxr_idx); + a->q_vector[v_idx].rxr_count++; + a->rx_ring[r_idx].v_idx = 1 << v_idx; +} + +static inline void map_vector_to_txq(struct ixgbe_adapter *a, int v_idx, + int r_idx) +{ + a->q_vector[v_idx].adapter = a; + set_bit(r_idx, a->q_vector[v_idx].txr_idx); + a->q_vector[v_idx].txr_count++; + a->tx_ring[r_idx].v_idx = 1 << v_idx; +} + /** - * ixgbe_setup_msix - Initialize MSI-X interrupts + * ixgbe_map_rings_to_vectors - Maps descriptor rings to vectors + * @adapter: board private structure to initialize + * @vectors: allotted vector count for descriptor rings * - * ixgbe_setup_msix allocates MSI-X vectors and requests - * interrutps from the kernel. + * This function maps descriptor rings to the queue-specific vectors + * we were allotted through the MSI-X enabling code. Ideally, we'd have + * one vector per ring/queue, but on a constrained vector budget, we + * group the rings as "efficiently" as possible. You would add new + * mapping configurations in here. **/ -static int ixgbe_setup_msix(struct ixgbe_adapter *adapter) -{ - struct net_device *netdev = adapter->netdev; - int i, int_vector = 0, err = 0; - int max_msix_count; +static int ixgbe_map_rings_to_vectors(struct ixgbe_adapter *adapter, + int vectors) +{ + int v_start = 0; + int rxr_idx = 0, txr_idx = 0; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int i, j; + int rqpv, tqpv; + int err = 0; + + /* No mapping required if MSI-X is disabled. */ + if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) + goto out; - /* +1 for the LSC interrupt */ - max_msix_count = adapter->num_rx_queues + adapter->num_tx_queues + 1; - adapter->msix_entries = kcalloc(max_msix_count, - sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) - return -ENOMEM; + /* + * The ideal configuration... + * We have enough vectors to map one per queue. + */ + if (vectors == adapter->num_rx_queues + adapter->num_tx_queues) { + for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++) + map_vector_to_rxq(adapter, v_start, rxr_idx); - for (i = 0; i < max_msix_count; i++) - adapter->msix_entries[i].entry = i; + for (; txr_idx < txr_remaining; v_start++, txr_idx++) + map_vector_to_txq(adapter, v_start, txr_idx); - err = pci_enable_msix(adapter->pdev, adapter->msix_entries, - max_msix_count); - if (err) goto out; + } - for (i = 0; i < adapter->num_tx_queues; i++) { - sprintf(adapter->tx_ring[i].name, "%s-tx%d", netdev->name, i); - err = request_irq(adapter->msix_entries[int_vector].vector, - &ixgbe_msix_clean_tx, - 0, - adapter->tx_ring[i].name, - &(adapter->tx_ring[i])); - if (err) { - DPRINTK(PROBE, ERR, - "request_irq failed for MSIX interrupt " - "Error: %d\n", err); - goto release_irqs; + /* + * If we don't have enough vectors for a 1-to-1 + * mapping, we'll have to group them so there are + * multiple queues per vector. + */ + /* Re-adjusting *qpv takes care of the remainder. */ + for (i = v_start; i < vectors; i++) { + rqpv = DIV_ROUND_UP(rxr_remaining, vectors - i); + for (j = 0; j < rqpv; j++) { + map_vector_to_rxq(adapter, i, rxr_idx); + rxr_idx++; + rxr_remaining--; + } + } + for (i = v_start; i < vectors; i++) { + tqpv = DIV_ROUND_UP(txr_remaining, vectors - i); + for (j = 0; j < tqpv; j++) { + map_vector_to_txq(adapter, i, txr_idx); + txr_idx++; + txr_remaining--; } - adapter->tx_ring[i].eims_value = - (1 << IXGBE_MSIX_VECTOR(int_vector)); - adapter->tx_ring[i].itr_register = IXGBE_EITR(int_vector); - int_vector++; } - for (i = 0; i < adapter->num_rx_queues; i++) { - if (strlen(netdev->name) < (IFNAMSIZ - 5)) - sprintf(adapter->rx_ring[i].name, - "%s-rx%d", netdev->name, i); - else - memcpy(adapter->rx_ring[i].name, - netdev->name, IFNAMSIZ); - err = request_irq(adapter->msix_entries[int_vector].vector, - &ixgbe_msix_clean_rx, 0, - adapter->rx_ring[i].name, - &(adapter->rx_ring[i])); +out: + return err; +} + +/** + * ixgbe_request_msix_irqs - Initialize MSI-X interrupts + * @adapter: board private structure + * + * ixgbe_request_msix_irqs allocates MSI-X vectors and requests + * interrupts from the kernel. + **/ +static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + irqreturn_t (*handler)(int, void *); + int i, vector, q_vectors, err; + + /* Decrement for Other and TCP Timer vectors */ + q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + + /* Map the Tx/Rx rings to the vectors we were allotted. */ + err = ixgbe_map_rings_to_vectors(adapter, q_vectors); + if (err) + goto out; + +#define SET_HANDLER(_v) ((!(_v)->rxr_count) ? &ixgbe_msix_clean_tx : \ + (!(_v)->txr_count) ? &ixgbe_msix_clean_rx : \ + &ixgbe_msix_clean_many) + for (vector = 0; vector < q_vectors; vector++) { + handler = SET_HANDLER(&adapter->q_vector[vector]); + sprintf(adapter->name[vector], "%s:v%d-%s", + netdev->name, vector, + (handler == &ixgbe_msix_clean_rx) ? "Rx" : + ((handler == &ixgbe_msix_clean_tx) ? "Tx" : "TxRx")); + err = request_irq(adapter->msix_entries[vector].vector, + handler, 0, adapter->name[vector], + &(adapter->q_vector[vector])); if (err) { DPRINTK(PROBE, ERR, "request_irq failed for MSIX interrupt " "Error: %d\n", err); - goto release_irqs; + goto free_queue_irqs; } - - adapter->rx_ring[i].eims_value = - (1 << IXGBE_MSIX_VECTOR(int_vector)); - adapter->rx_ring[i].itr_register = IXGBE_EITR(int_vector); - int_vector++; } - sprintf(adapter->lsc_name, "%s-lsc", netdev->name); - err = request_irq(adapter->msix_entries[int_vector].vector, - &ixgbe_msix_lsc, 0, adapter->lsc_name, netdev); + sprintf(adapter->name[vector], "%s:lsc", netdev->name); + err = request_irq(adapter->msix_entries[vector].vector, + &ixgbe_msix_lsc, 0, adapter->name[vector], netdev); if (err) { DPRINTK(PROBE, ERR, "request_irq for msix_lsc failed: %d\n", err); - goto release_irqs; + goto free_queue_irqs; } - /* FIXME: implement netif_napi_remove() instead */ - adapter->napi.poll = ixgbe_clean_rxonly; - adapter->flags |= IXGBE_FLAG_MSIX_ENABLED; return 0; -release_irqs: - int_vector--; - for (; int_vector >= adapter->num_tx_queues; int_vector--) - free_irq(adapter->msix_entries[int_vector].vector, - &(adapter->rx_ring[int_vector - - adapter->num_tx_queues])); - - for (; int_vector >= 0; int_vector--) - free_irq(adapter->msix_entries[int_vector].vector, - &(adapter->tx_ring[int_vector])); -out: +free_queue_irqs: + for (i = vector - 1; i >= 0; i--) + free_irq(adapter->msix_entries[--vector].vector, + &(adapter->q_vector[i])); + adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; + pci_disable_msix(adapter->pdev); kfree(adapter->msix_entries); adapter->msix_entries = NULL; - adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; +out: return err; } +static void ixgbe_set_itr(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_q_vector *q_vector = adapter->q_vector; + u8 current_itr; + u32 new_itr = q_vector->eitr; + struct ixgbe_ring *rx_ring = &adapter->rx_ring[0]; + struct ixgbe_ring *tx_ring = &adapter->tx_ring[0]; + + q_vector->tx_eitr = ixgbe_update_itr(adapter, new_itr, + q_vector->tx_eitr, + tx_ring->total_packets, + tx_ring->total_bytes); + q_vector->rx_eitr = ixgbe_update_itr(adapter, new_itr, + q_vector->rx_eitr, + rx_ring->total_packets, + rx_ring->total_bytes); + + current_itr = max(q_vector->rx_eitr, q_vector->tx_eitr); + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = 100000; + break; + case low_latency: + new_itr = 20000; /* aka hwitr = ~200 */ + break; + case bulk_latency: + new_itr = 8000; + break; + default: + break; + } + + if (new_itr != q_vector->eitr) { + u32 itr_reg; + /* do an exponential smoothing */ + new_itr = ((q_vector->eitr * 90)/100) + ((new_itr * 10)/100); + q_vector->eitr = new_itr; + itr_reg = EITR_INTS_PER_SEC_TO_REG(new_itr); + /* must write high and low 16 bits to reset counter */ + IXGBE_WRITE_REG(hw, IXGBE_EITR(0), itr_reg | (itr_reg)<<16); + } + + return; +} + +static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter); + /** - * ixgbe_intr - Interrupt Handler + * ixgbe_intr - legacy mode Interrupt Handler * @irq: interrupt number * @data: pointer to a network interface device structure * @pt_regs: CPU registers structure @@ -772,8 +1202,10 @@ static irqreturn_t ixgbe_intr(int irq, void *data) struct ixgbe_hw *hw = &adapter->hw; u32 eicr; - eicr = IXGBE_READ_REG(hw, IXGBE_EICR); + /* for NAPI, using EIAM to auto-mask tx/rx interrupt bits on read + * therefore no explict interrupt disable is necessary */ + eicr = IXGBE_READ_REG(hw, IXGBE_EICR); if (!eicr) return IRQ_NONE; /* Not our interrupt */ @@ -782,16 +1214,33 @@ static irqreturn_t ixgbe_intr(int irq, void *data) if (!test_bit(__IXGBE_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies); } - if (netif_rx_schedule_prep(netdev, &adapter->napi)) { - /* Disable interrupts and register for poll. The flush of the - * posted write is intentionally left out. */ - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); - __netif_rx_schedule(netdev, &adapter->napi); + + + if (netif_rx_schedule_prep(netdev, &adapter->q_vector[0].napi)) { + adapter->tx_ring[0].total_packets = 0; + adapter->tx_ring[0].total_bytes = 0; + adapter->rx_ring[0].total_packets = 0; + adapter->rx_ring[0].total_bytes = 0; + /* would disable interrupts here but EIAM disabled it */ + __netif_rx_schedule(netdev, &adapter->q_vector[0].napi); } return IRQ_HANDLED; } +static inline void ixgbe_reset_q_vectors(struct ixgbe_adapter *adapter) +{ + int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + + for (i = 0; i < q_vectors; i++) { + struct ixgbe_q_vector *q_vector = &adapter->q_vector[i]; + bitmap_zero(q_vector->rxr_idx, MAX_RX_QUEUES); + bitmap_zero(q_vector->txr_idx, MAX_TX_QUEUES); + q_vector->rxr_count = 0; + q_vector->txr_count = 0; + } +} + /** * ixgbe_request_irq - initialize interrupts * @adapter: board private structure @@ -799,40 +1248,24 @@ static irqreturn_t ixgbe_intr(int irq, void *data) * Attempts to configure interrupts using the best available * capabilities of the hardware and kernel. **/ -static int ixgbe_request_irq(struct ixgbe_adapter *adapter, u32 *num_rx_queues) +static int ixgbe_request_irq(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; - int flags, err; - irq_handler_t handler = ixgbe_intr; - - flags = IRQF_SHARED; - - err = ixgbe_setup_msix(adapter); - if (!err) - goto request_done; - - /* - * if we can't do MSI-X, fall through and try MSI - * No need to reallocate memory since we're decreasing the number of - * queues. We just won't use the other ones, also it is freed correctly - * on ixgbe_remove. - */ - *num_rx_queues = 1; + int err; - /* do MSI */ - err = pci_enable_msi(adapter->pdev); - if (!err) { - adapter->flags |= IXGBE_FLAG_MSI_ENABLED; - flags &= ~IRQF_SHARED; - handler = &ixgbe_intr; + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { + err = ixgbe_request_msix_irqs(adapter); + } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { + err = request_irq(adapter->pdev->irq, &ixgbe_intr, 0, + netdev->name, netdev); + } else { + err = request_irq(adapter->pdev->irq, &ixgbe_intr, IRQF_SHARED, + netdev->name, netdev); } - err = request_irq(adapter->pdev->irq, handler, flags, - netdev->name, netdev); if (err) DPRINTK(PROBE, ERR, "request_irq failed, Error %d\n", err); -request_done: return err; } @@ -841,28 +1274,22 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter) struct net_device *netdev = adapter->netdev; if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { - int i; + int i, q_vectors; - for (i = 0; i < adapter->num_tx_queues; i++) - free_irq(adapter->msix_entries[i].vector, - &(adapter->tx_ring[i])); - for (i = 0; i < adapter->num_rx_queues; i++) - free_irq(adapter->msix_entries[i + - adapter->num_tx_queues].vector, - &(adapter->rx_ring[i])); - i = adapter->num_rx_queues + adapter->num_tx_queues; + q_vectors = adapter->num_msix_vectors; + + i = q_vectors - 1; free_irq(adapter->msix_entries[i].vector, netdev); - pci_disable_msix(adapter->pdev); - kfree(adapter->msix_entries); - adapter->msix_entries = NULL; - adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; - return; - } - free_irq(adapter->pdev->irq, netdev); - if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { - pci_disable_msi(adapter->pdev); - adapter->flags &= ~IXGBE_FLAG_MSI_ENABLED; + i--; + for (; i >= 0; i--) { + free_irq(adapter->msix_entries[i].vector, + &(adapter->q_vector[i])); + } + + ixgbe_reset_q_vectors(adapter); + } else { + free_irq(adapter->pdev->irq, netdev); } } @@ -874,7 +1301,13 @@ static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); IXGBE_WRITE_FLUSH(&adapter->hw); - synchronize_irq(adapter->pdev->irq); + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { + int i; + for (i = 0; i < adapter->num_msix_vectors; i++) + synchronize_irq(adapter->msix_entries[i].vector); + } else { + synchronize_irq(adapter->pdev->irq); + } } /** @@ -883,12 +1316,9 @@ static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter) **/ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter) { - if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, - (IXGBE_EIMS_ENABLE_MASK & - ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC))); - IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, - IXGBE_EIMS_ENABLE_MASK); + u32 mask; + mask = IXGBE_EIMS_ENABLE_MASK; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask); IXGBE_WRITE_FLUSH(&adapter->hw); } @@ -898,20 +1328,18 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter) **/ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter) { - int i; struct ixgbe_hw *hw = &adapter->hw; - if (adapter->rx_eitr) - IXGBE_WRITE_REG(hw, IXGBE_EITR(0), - EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr)); - - /* for re-triggering the interrupt in non-NAPI mode */ - adapter->rx_ring[0].eims_value = (1 << IXGBE_MSIX_VECTOR(0)); - adapter->tx_ring[0].eims_value = (1 << IXGBE_MSIX_VECTOR(0)); + IXGBE_WRITE_REG(hw, IXGBE_EITR(0), + EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr)); ixgbe_set_ivar(adapter, IXGBE_IVAR_RX_QUEUE(0), 0); - for (i = 0; i < adapter->num_tx_queues; i++) - ixgbe_set_ivar(adapter, IXGBE_IVAR_TX_QUEUE(i), i); + ixgbe_set_ivar(adapter, IXGBE_IVAR_TX_QUEUE(0), 0); + + map_vector_to_rxq(adapter, 0, 0); + map_vector_to_txq(adapter, 0, 0); + + DPRINTK(HW, INFO, "Legacy interrupt IVAR setup done\n"); } /** @@ -924,23 +1352,29 @@ static void ixgbe_configure_tx(struct ixgbe_adapter *adapter) { u64 tdba; struct ixgbe_hw *hw = &adapter->hw; - u32 i, tdlen; + u32 i, j, tdlen, txctrl; /* Setup the HW Tx Head and Tail descriptor pointers */ for (i = 0; i < adapter->num_tx_queues; i++) { + j = adapter->tx_ring[i].reg_idx; tdba = adapter->tx_ring[i].dma; tdlen = adapter->tx_ring[i].count * - sizeof(union ixgbe_adv_tx_desc); - IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i), (tdba & DMA_32BIT_MASK)); - IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32)); - IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i), tdlen); - IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0); - IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0); - adapter->tx_ring[i].head = IXGBE_TDH(i); - adapter->tx_ring[i].tail = IXGBE_TDT(i); + sizeof(union ixgbe_adv_tx_desc); + IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), + (tdba & DMA_32BIT_MASK)); + IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); + IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), tdlen); + IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); + IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); + adapter->tx_ring[i].head = IXGBE_TDH(j); + adapter->tx_ring[i].tail = IXGBE_TDT(j); + /* Disable Tx Head Writeback RO bit, since this hoses + * bookkeeping if things aren't delivered in order. + */ + txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); + txctrl &= ~IXGBE_DCA_TXCTRL_TX_WB_RO_EN; + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl); } - - IXGBE_WRITE_REG(hw, IXGBE_TIPG, IXGBE_TIPG_FIBER_DEFAULT); } #define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \ @@ -959,13 +1393,12 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + int i, j; u32 rdlen, rxctrl, rxcsum; u32 random[10]; - u32 reta, mrqc; - int i; u32 fctrl, hlreg0; - u32 srrctl; u32 pages; + u32 reta = 0, mrqc, srrctl; /* Decide whether to use packet split mode or not */ if (netdev->mtu > ETH_DATA_LEN) @@ -985,6 +1418,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; + fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); @@ -1036,37 +1470,23 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) adapter->rx_ring[i].tail = IXGBE_RDT(i); } - if (adapter->num_rx_queues > 1) { - /* Random 40bytes used as random key in RSS hash function */ - get_random_bytes(&random[0], 40); - - switch (adapter->num_rx_queues) { - case 8: - case 4: - /* Bits [3:0] in each byte refers the Rx queue no */ - reta = 0x00010203; - break; - case 2: - reta = 0x00010001; - break; - default: - reta = 0x00000000; - break; - } - + if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { /* Fill out redirection table */ - for (i = 0; i < 32; i++) { - IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RETA(0), i, reta); - if (adapter->num_rx_queues > 4) { - i++; - IXGBE_WRITE_REG_ARRAY(hw, IXGBE_RETA(0), i, - 0x04050607); - } + for (i = 0, j = 0; i < 128; i++, j++) { + if (j == adapter->ring_feature[RING_F_RSS].indices) + j = 0; + / |