diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx4/en_tx.c')
| -rw-r--r-- | drivers/net/ethernet/mellanox/mlx4/en_tx.c | 163 | 
1 files changed, 115 insertions, 48 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 0698c82d6ff..5045bab5963 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -39,51 +39,62 @@  #include <linux/if_vlan.h>  #include <linux/vmalloc.h>  #include <linux/tcp.h> +#include <linux/ip.h>  #include <linux/moduleparam.h>  #include "mlx4_en.h" -enum { -	MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ -	MAX_BF = 256, -}; - -static int inline_thold __read_mostly = MAX_INLINE; - -module_param_named(inline_thold, inline_thold, int, 0444); -MODULE_PARM_DESC(inline_thold, "threshold for using inline data"); -  int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, -			   struct mlx4_en_tx_ring *ring, int qpn, u32 size, -			   u16 stride) +			   struct mlx4_en_tx_ring **pring, int qpn, u32 size, +			   u16 stride, int node, int queue_index)  {  	struct mlx4_en_dev *mdev = priv->mdev; +	struct mlx4_en_tx_ring *ring;  	int tmp;  	int err; +	ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node); +	if (!ring) { +		ring = kzalloc(sizeof(*ring), GFP_KERNEL); +		if (!ring) { +			en_err(priv, "Failed allocating TX ring\n"); +			return -ENOMEM; +		} +	} +  	ring->size = size;  	ring->size_mask = size - 1;  	ring->stride = stride; - -	inline_thold = min(inline_thold, MAX_INLINE); +	ring->inline_thold = priv->prof->inline_thold;  	tmp = size * sizeof(struct mlx4_en_tx_info); -	ring->tx_info = vmalloc(tmp); -	if (!ring->tx_info) -		return -ENOMEM; +	ring->tx_info = vmalloc_node(tmp, node); +	if (!ring->tx_info) { +		ring->tx_info = vmalloc(tmp); +		if (!ring->tx_info) { +			err = -ENOMEM; +			goto err_ring; +		} +	}  	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",  		 ring->tx_info, tmp); -	ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); +	ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node);  	if (!ring->bounce_buf) { -		err = -ENOMEM; -		goto err_tx; +		ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); +		if (!ring->bounce_buf) { +			err = -ENOMEM; +			goto err_info; +		}  	}  	ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); +	/* Allocate HW buffers on provided NUMA node */ +	set_dev_node(&mdev->dev->pdev->dev, node);  	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,  				 2 * PAGE_SIZE); +	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);  	if (err) {  		en_err(priv, "Failed allocating hwq resources\n");  		goto err_bounce; @@ -97,21 +108,21 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,  	ring->buf = ring->wqres.buf.direct.buf; -	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d " -	       "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, -	       ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); +	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n", +	       ring, ring->buf, ring->size, ring->buf_size, +	       (unsigned long long) ring->wqres.buf.direct.map);  	ring->qpn = qpn; -	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); +	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);  	if (err) {  		en_err(priv, "Failed allocating qp %d\n", ring->qpn);  		goto err_map;  	}  	ring->qp.event = mlx4_en_sqp_event; -	err = mlx4_bf_alloc(mdev->dev, &ring->bf); +	err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);  	if (err) { -		en_dbg(DRV, priv, "working without blueflame (%d)", err); +		en_dbg(DRV, priv, "working without blueflame (%d)\n", err);  		ring->bf.uar = &mdev->priv_uar;  		ring->bf.uar->map = mdev->uar_map;  		ring->bf_enabled = false; @@ -119,7 +130,12 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,  		ring->bf_enabled = true;  	ring->hwtstamp_tx_type = priv->hwtstamp_config.tx_type; +	ring->queue_index = queue_index; + +	if (queue_index < priv->num_tx_rings_p_up && cpu_online(queue_index)) +		cpumask_set_cpu(queue_index, &ring->affinity_mask); +	*pring = ring;  	return 0;  err_map: @@ -129,16 +145,20 @@ err_hwq_res:  err_bounce:  	kfree(ring->bounce_buf);  	ring->bounce_buf = NULL; -err_tx: +err_info:  	vfree(ring->tx_info);  	ring->tx_info = NULL; +err_ring: +	kfree(ring); +	*pring = NULL;  	return err;  }  void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, -			     struct mlx4_en_tx_ring *ring) +			     struct mlx4_en_tx_ring **pring)  {  	struct mlx4_en_dev *mdev = priv->mdev; +	struct mlx4_en_tx_ring *ring = *pring;  	en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);  	if (ring->bf_enabled) @@ -151,6 +171,8 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,  	ring->bounce_buf = NULL;  	vfree(ring->tx_info);  	ring->tx_info = NULL; +	kfree(ring); +	*pring = NULL;  }  int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, @@ -178,6 +200,9 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,  	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,  			       &ring->qp, &ring->qp_state); +	if (!user_prio && cpu_online(ring->queue_index)) +		netif_set_xps_queue(priv->dev, &ring->affinity_mask, +				    ring->queue_index);  	return err;  } @@ -326,11 +351,12 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)  	return cnt;  } -static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) +static bool mlx4_en_process_tx_cq(struct net_device *dev, +				 struct mlx4_en_cq *cq)  {  	struct mlx4_en_priv *priv = netdev_priv(dev);  	struct mlx4_cq *mcq = &cq->mcq; -	struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; +	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];  	struct mlx4_cqe *cqe;  	u16 index;  	u16 new_index, ring_index, stamp_index; @@ -344,9 +370,11 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)  	u32 bytes = 0;  	int factor = priv->cqe_factor;  	u64 timestamp = 0; +	int done = 0; +	int budget = priv->tx_work_limit;  	if (!priv->port_up) -		return; +		return true;  	index = cons_index & size_mask;  	cqe = &buf[(index << factor) + factor]; @@ -355,7 +383,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)  	/* Process all completed CQEs */  	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, -			cons_index & size)) { +			cons_index & size) && (done < budget)) {  		/*  		 * make sure we read the CQE after we read the  		 * ownership bit @@ -393,7 +421,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)  			txbbs_stamp = txbbs_skipped;  			packets++;  			bytes += ring->tx_info[ring_index].nr_bytes; -		} while (ring_index != new_index); +		} while ((++done < budget) && (ring_index != new_index));  		++cons_index;  		index = cons_index & size_mask; @@ -417,8 +445,9 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)  	 */  	if (netif_tx_queue_stopped(ring->tx_queue) && txbbs_skipped > 0) {  		netif_tx_wake_queue(ring->tx_queue); -		priv->port_stats.wake_queue++; +		ring->wake_queue++;  	} +	return done < budget;  }  void mlx4_en_tx_irq(struct mlx4_cq *mcq) @@ -426,10 +455,29 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq)  	struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);  	struct mlx4_en_priv *priv = netdev_priv(cq->dev); -	mlx4_en_process_tx_cq(cq->dev, cq); -	mlx4_en_arm_cq(priv, cq); +	if (priv->port_up) +		napi_schedule(&cq->napi); +	else +		mlx4_en_arm_cq(priv, cq);  } +/* TX CQ polling - called by NAPI */ +int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget) +{ +	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); +	struct net_device *dev = cq->dev; +	struct mlx4_en_priv *priv = netdev_priv(dev); +	int clean_complete; + +	clean_complete = mlx4_en_process_tx_cq(dev, cq); +	if (!clean_complete) +		return budget; + +	napi_complete(napi); +	mlx4_en_arm_cq(priv, cq); + +	return 0; +}  static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,  						      struct mlx4_en_tx_ring *ring, @@ -459,7 +507,7 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,  	return ring->buf + index * TXBB_SIZE;  } -static int is_inline(struct sk_buff *skb, void **pfrag) +static int is_inline(int inline_thold, struct sk_buff *skb, void **pfrag)  {  	void *ptr; @@ -500,7 +548,10 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev,  	int real_size;  	if (skb_is_gso(skb)) { -		*lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb); +		if (skb->encapsulation) +			*lso_header_size = (skb_inner_transport_header(skb) - skb->data) + inner_tcp_hdrlen(skb); +		else +			*lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb);  		real_size = CTRL_SIZE + skb_shinfo(skb)->nr_frags * DS_SIZE +  			ALIGN(*lso_header_size + 4, DS_SIZE);  		if (unlikely(*lso_header_size != skb_headlen(skb))) { @@ -516,7 +567,7 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev,  		}  	} else {  		*lso_header_size = 0; -		if (!is_inline(skb, NULL)) +		if (!is_inline(priv->prof->inline_thold, skb, NULL))  			real_size = CTRL_SIZE + (skb_shinfo(skb)->nr_frags + 1) * DS_SIZE;  		else  			real_size = inline_size(skb); @@ -532,7 +583,13 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk  	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;  	if (skb->len <= spc) { -		inl->byte_count = cpu_to_be32(1 << 31 | skb->len); +		if (likely(skb->len >= MIN_PKT_LEN)) { +			inl->byte_count = cpu_to_be32(1 << 31 | skb->len); +		} else { +			inl->byte_count = cpu_to_be32(1 << 31 | MIN_PKT_LEN); +			memset(((void *)(inl + 1)) + skb->len, 0, +			       MIN_PKT_LEN - skb->len); +		}  		skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb));  		if (skb_shinfo(skb)->nr_frags)  			memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr, @@ -564,7 +621,8 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk  	}  } -u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) +u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, +			 void *accel_priv, select_queue_fallback_t fallback)  {  	struct mlx4_en_priv *priv = netdev_priv(dev);  	u16 rings_p_up = priv->num_tx_rings_p_up; @@ -576,7 +634,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb)  	if (vlan_tx_tag_present(skb))  		up = vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT; -	return __netdev_pick_tx(dev, skb) % rings_p_up + up * rings_p_up; +	return fallback(dev, skb) % rings_p_up + up * rings_p_up;  }  static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt) @@ -622,7 +680,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)  	}  	tx_ind = skb->queue_mapping; -	ring = &priv->tx_ring[tx_ind]; +	ring = priv->tx_ring[tx_ind];  	if (vlan_tx_tag_present(skb))  		vlan_tag = vlan_tx_tag_get(skb); @@ -631,7 +689,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)  		     ring->size - HEADROOM - MAX_DESC_TXBBS)) {  		/* every full Tx ring stops queue */  		netif_tx_stop_queue(ring->tx_queue); -		priv->port_stats.queue_stopped++; +		ring->queue_stopped++;  		/* If queue was emptied after the if, and before the  		 * stop_queue - need to wake the queue, or else it will remain @@ -644,7 +702,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)  		if (unlikely(((int)(ring->prod - ring->cons)) <=  			     ring->size - HEADROOM - MAX_DESC_TXBBS)) {  			netif_tx_wake_queue(ring->tx_queue); -			priv->port_stats.wake_queue++; +			ring->wake_queue++;  		} else {  			return NETDEV_TX_BUSY;  		} @@ -682,11 +740,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)  	tx_info->data_offset = (void *)data - (void *)tx_desc;  	tx_info->linear = (lso_header_size < skb_headlen(skb) && -			   !is_inline(skb, NULL)) ? 1 : 0; +			   !is_inline(ring->inline_thold, skb, NULL)) ? 1 : 0;  	data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1; -	if (is_inline(skb, &fragptr)) { +	if (is_inline(ring->inline_thold, skb, &fragptr)) {  		tx_info->inl = 1;  	} else {  		/* Map fragments */ @@ -799,6 +857,14 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)  		tx_info->inl = 1;  	} +	if (skb->encapsulation) { +		struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb); +		if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol == IPPROTO_UDP) +			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP); +		else +			op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP); +	} +  	ring->prod += nr_txbb;  	/* If we used a bounce buffer then copy descriptor back into place */ @@ -808,7 +874,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)  	skb_tx_timestamp(skb);  	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && !vlan_tx_tag_present(skb)) { -		*(__be32 *) (&tx_desc->ctrl.vlan_tag) |= cpu_to_be32(ring->doorbell_qpn); +		tx_desc->ctrl.bf_qpn |= cpu_to_be32(ring->doorbell_qpn); +  		op_own |= htonl((bf_index & 0xffff) << 8);  		/* Ensure new descirptor hits memory  		* before setting ownership of this descriptor to HW */  | 
