diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_tx.c')
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_tx.c | 111 |
1 files changed, 75 insertions, 36 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index f54ebd5a170..5045bab5963 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -39,23 +39,14 @@ #include <linux/if_vlan.h> #include <linux/vmalloc.h> #include <linux/tcp.h> +#include <linux/ip.h> #include <linux/moduleparam.h> #include "mlx4_en.h" -enum { - MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ - MAX_BF = 256, -}; - -static int inline_thold __read_mostly = MAX_INLINE; - -module_param_named(inline_thold, inline_thold, int, 0444); -MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); - int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, int qpn, u32 size, - u16 stride, int node) + u16 stride, int node, int queue_index) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; @@ -74,8 +65,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->size = size; ring->size_mask = size - 1; ring->stride = stride; - - inline_thold = min(inline_thold, MAX_INLINE); + ring->inline_thold = priv->prof->inline_thold; tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = vmalloc_node(tmp, node); @@ -118,12 +108,12 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->buf = ring->wqres.buf.direct.buf; - en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d " - "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, - ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); + en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n", + ring, ring->buf, ring->size, ring->buf_size, + (unsigned long long) ring->wqres.buf.direct.map); ring->qpn = qpn; - err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); + err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL); if (err) { en_err(priv, "Failed allocating qp %d\n", ring->qpn); goto err_map; @@ -132,7 +122,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, err = mlx4_bf_alloc(mdev->dev, &ring->bf, node); if (err) { - en_dbg(DRV, priv, "working without blueflame (%d)", err); + en_dbg(DRV, priv, "working without blueflame (%d)\n", err); ring->bf.uar = &mdev->priv_uar; ring->bf.uar->map = mdev->uar_map; ring->bf_enabled = false; @@ -140,6 +130,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->bf_enabled = true; ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type; + ring->queue_index = queue_index; + + if (queue_index < priv->num_tx_rings_p_up && cpu_online(queue_index)) + cpumask_set_cpu(queue_index, &ring->affinity_mask); *pring = ring; return 0; @@ -206,6 +200,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, &ring->qp, &ring->qp_state); + if (!user_prio && cpu_online(ring->queue_index)) + netif_set_xps_queue(priv->dev, &ring->affinity_mask, + ring->queue_index); return err; } @@ -354,7 +351,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } -static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) +static bool mlx4_en_process_tx_cq(struct net_device *dev, + struct mlx4_en_cq *cq) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cq *mcq = &cq->mcq; @@ -372,9 +370,11 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) u32 bytes = 0; int factor = priv->cqe_factor; u64 timestamp = 0; + int done = 0; + int budget = priv->tx_work_limit; if (!priv->port_up) - return; + return true; index = cons_index & size_mask; cqe = &buf[(index << factor) + factor]; @@ -383,7 +383,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, - cons_index & size)) { + cons_index & size) && (done < budget)) { /* * make sure we read the CQE after we read the * ownership bit @@ -421,7 +421,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) txbbs_stamp = txbbs_skipped; packets++; bytes += ring->tx_info[ring_index].nr_bytes; - } while (ring_index != new_index); + } while ((++done < budget) && (ring_index != new_index)); ++cons_index; index = cons_index & size_mask; @@ -445,8 +445,9 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) */ if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) { netif_tx_wake_queue(ring->tx_queue); - priv->port_stats.wake_queue++; + ring->wake_queue++; } + return done < budget; } void mlx4_en_tx_irq(struct mlx4_cq *mcq) @@ -454,10 +455,29 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq) struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); struct mlx4_en_priv *priv = netdev_priv(cq->dev); - mlx4_en_process_tx_cq(cq->dev, cq); - mlx4_en_arm_cq(priv, cq); + if (priv->port_up) + napi_schedule(&cq->napi); + else + mlx4_en_arm_cq(priv, cq); } +/* TX CQ polling - called by NAPI */ +int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) +{ + struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + int clean_complete; + + clean_complete = mlx4_en_process_tx_cq(dev, cq); + if (!clean_complete) + return budget; + + napi_complete(napi); + mlx4_en_arm_cq(priv, cq); + + return 0; +} static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring, @@ -487,7 +507,7 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, return ring->buf + index * TXBB_SIZE; } -static int is_inline(struct sk_buff *skb, void **pfrag) +static int is_inline(int inline_thold, struct sk_buff *skb, void **pfrag) { void *ptr; @@ -528,7 +548,10 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev, int real_size; if (skb_is_gso(skb)) { - *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb); + if (skb->encapsulation) + *lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb); + else + *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb); real_size = CTRL_SIZE + skb_shinfo(skb)->nr_frags * DS_SIZE + ALIGN(*lso_header_size + 4, DS_SIZE); if (unlikely(*lso_header_size != skb_headlen(skb))) { @@ -544,7 +567,7 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev, } } else { *lso_header_size = 0; - if (!is_inline(skb, NULL)) + if (!is_inline(priv->prof->inline_thold, skb, NULL)) real_size = CTRL_SIZE + (skb_shinfo(skb)->nr_frags + 1) * DS_SIZE; else real_size = inline_size(skb); @@ -560,7 +583,13 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; if (skb->len <= spc) { - inl->byte_count = cpu_to_be32(1 << 31 | skb->len); + if (likely(skb->len >= MIN_PKT_LEN)) { + inl->byte_count = cpu_to_be32(1 << 31 | skb->len); + } else { + inl->byte_count = cpu_to_be32(1 << 31 | MIN_PKT_LEN); + memset(((void *)(inl + 1)) + skb->len, 0, + MIN_PKT_LEN - skb->len); + } skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb)); if (skb_shinfo(skb)->nr_frags) memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr, @@ -592,7 +621,8 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk } } -u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) +u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) { struct mlx4_en_priv *priv = netdev_priv(dev); u16 rings_p_up = priv->num_tx_rings_p_up; @@ -604,7 +634,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) if (vlan_tx_tag_present(skb)) up = vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT; - return __netdev_pick_tx(dev, skb) % rings_p_up + up * rings_p_up; + return fallback(dev, skb) % rings_p_up + up * rings_p_up; } static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt) @@ -659,7 +689,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ring->size - HEADROOM - MAX_DESC_TXBBS)) { /* every full Tx ring stops queue */ netif_tx_stop_queue(ring->tx_queue); - priv->port_stats.queue_stopped++; + ring->queue_stopped++; /* If queue was emptied after the if, and before the * stop_queue - need to wake the queue, or else it will remain @@ -672,7 +702,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(((int)(ring->prod - ring->cons)) <= ring->size - HEADROOM - MAX_DESC_TXBBS)) { netif_tx_wake_queue(ring->tx_queue); - priv->port_stats.wake_queue++; + ring->wake_queue++; } else { return NETDEV_TX_BUSY; } @@ -710,11 +740,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->data_offset = (void *)data - (void *)tx_desc; tx_info->linear = (lso_header_size < skb_headlen(skb) && - !is_inline(skb, NULL)) ? 1 : 0; + !is_inline(ring->inline_thold, skb, NULL)) ? 1 : 0; data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1; - if (is_inline(skb, &fragptr)) { + if (is_inline(ring->inline_thold, skb, &fragptr)) { tx_info->inl = 1; } else { /* Map fragments */ @@ -827,6 +857,14 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) tx_info->inl = 1; } + if (skb->encapsulation) { + struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb); + if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol == IPPROTO_UDP) + op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP); + else + op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP); + } + ring->prod += nr_txbb; /* If we used a bounce buffer then copy descriptor back into place */ @@ -836,7 +874,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tx_tag_present(skb)) { - *(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); + tx_desc->ctrl.bf_qpn |= cpu_to_be32(ring->doorbell_qpn); + op_own |= htonl((bf_index & 0xffff) << 8); /* Ensure new descirptor hits memory * before setting ownership of this descriptor to HW */ |
