From e6309cff76a6adc0010609d45d34173e524a546d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Jun 2013 07:54:55 +0000 Subject: net/mlx4: use one page fragment per incoming frame mlx4 driver has a suboptimal memory allocation strategy for regular MTU=1500 frames, as it uses two page fragments : One of 512 bytes and one of 1024 bytes. This makes GRO less effective, as each GSO packet contains 8 MSS instead of 16 MSS. Performance of a single TCP flow gains 25 % increase with the following patch. Before patch : A:~# netperf -H 192.168.0.2 -Cc MIGRATED TCP STREAM TEST ... Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 10.00 13798.47 3.06 4.20 0.436 0.598 After patch : A:~# netperf -H 192.68.0.2 -Cc MIGRATED TCP STREAM TEST ... Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 87380 16384 16384 10.00 17273.80 3.44 4.19 0.391 0.477 Signed-off-by: Eric Dumazet Cc: Amir Vadai Acked-By: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index b1d7657b2bf..b1f51c1f635 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -98,11 +98,11 @@ #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) #define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE) -/* Receive fragment sizes; we use at most 4 fragments (for 9600 byte MTU +/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU * and 4K allocations) */ enum { - FRAG_SZ0 = 512 - NET_IP_ALIGN, - FRAG_SZ1 = 1024, + FRAG_SZ0 = 1536 - NET_IP_ALIGN, + FRAG_SZ1 = 4096, FRAG_SZ2 = 4096, FRAG_SZ3 = MLX4_EN_ALLOC_SIZE }; -- cgit v1.2.3-18-g5258 From c08355fb61fb064d141b8bd156145d326f33022f Mon Sep 17 00:00:00 2001 From: "govindarajulu.v" Date: Mon, 3 Jun 2013 08:07:42 +0000 Subject: mlx4: use __netdev_pick_tx instead of __skb_tx_hash in mlx4_en_select_queue mlx4_en_select_queue() uses __skb_tx_hash to select the transmit queue. XPS settings are ignored by this. Instead, we can use __netdev_pick_tx to select the transmit queue. Compile test only. Signed-off-by: govindarajulu.v Acked-By: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 4e6877a032a..7c492382da0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -544,7 +544,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) if (vlan_tx_tag_present(skb)) up = vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT; - return __skb_tx_hash(dev, skb, rings_p_up) + up * rings_p_up; + return __netdev_pick_tx(dev, skb) % rings_p_up + up * rings_p_up; } static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt) -- cgit v1.2.3-18-g5258 From 948e306d7d645af80ea331b60495710fe4fe12bb Mon Sep 17 00:00:00 2001 From: Rony Efraim Date: Thu, 13 Jun 2013 13:19:11 +0300 Subject: net/mlx4: Add VF link state support Add support to change the link state of VF (vPort) Signed-off-by: Rony Efraim Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 48 ++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 +++++ drivers/net/ethernet/mellanox/mlx4/eq.c | 9 +++-- drivers/net/ethernet/mellanox/mlx4/fw.c | 8 +++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + 5 files changed, 72 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 0e572a52715..ea1e0389635 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -39,6 +39,7 @@ #include #include +#include #include #include @@ -2178,7 +2179,54 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in ivf->qos = s_info->default_qos; ivf->tx_rate = s_info->tx_rate; ivf->spoofchk = s_info->spoofchk; + ivf->linkstate = s_info->link_state; return 0; } EXPORT_SYMBOL_GPL(mlx4_get_vf_config); + +int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_vport_state *s_info; + struct mlx4_vport_oper_state *vp_oper; + int slave; + u8 link_stat_event; + + slave = mlx4_get_slave_indx(dev, vf); + if (slave < 0) + return -EINVAL; + + switch (link_state) { + case IFLA_VF_LINK_STATE_AUTO: + /* get current link state */ + if (!priv->sense.do_sense_port[port]) + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE; + else + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN; + break; + + case IFLA_VF_LINK_STATE_ENABLE: + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_ACTIVE; + break; + + case IFLA_VF_LINK_STATE_DISABLE: + link_stat_event = MLX4_PORT_CHANGE_SUBTYPE_DOWN; + break; + + default: + mlx4_warn(dev, "unknown value for link_state %02x on slave %d port %d\n", + link_state, slave, port); + return -EINVAL; + }; + /* update the admin & oper state on the link state */ + s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + s_info->link_state = link_state; + vp_oper->state.link_state = link_state; + + /* send event */ + mlx4_gen_port_state_change_eqe(dev, slave, port, link_stat_event); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 89c47ea84b5..ade276cca0e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2061,6 +2061,13 @@ static int mlx4_en_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_ return mlx4_get_vf_config(mdev->dev, en_priv->port, vf, ivf); } +static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_state) +{ + struct mlx4_en_priv *en_priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + return mlx4_set_vf_link_state(mdev->dev, en_priv->port, vf, link_state); +} static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2101,6 +2108,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_set_vf_mac = mlx4_en_set_vf_mac, .ndo_set_vf_vlan = mlx4_en_set_vf_vlan, .ndo_set_vf_spoofchk = mlx4_en_set_vf_spoofchk, + .ndo_set_vf_link_state = mlx4_en_set_vf_link_state, .ndo_get_vf_config = mlx4_en_get_vf_config, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx4_en_netpoll, diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 6000342f972..7e042869ef0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -448,6 +448,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) int i; enum slave_port_gen_event gen_event; unsigned long flags; + struct mlx4_vport_state *s_info; while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) { /* @@ -556,7 +557,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); - mlx4_slave_event(dev, i, eqe); + s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + mlx4_slave_event(dev, i, eqe); } else { /* IB port */ set_and_calc_slave_port_state(dev, i, port, MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, @@ -580,7 +583,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) for (i = 0; i < dev->num_slaves; i++) { if (i == mlx4_master_func_num(dev)) continue; - mlx4_slave_event(dev, i, eqe); + s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state; + if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state) + mlx4_slave_event(dev, i, eqe); } else /* IB port */ /* port-up event will be sent to a slave when the diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 2c97901c6a6..569bbe3e740 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -830,8 +830,10 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, u8 port_type; u16 short_field; int err; + int admin_link_state; #define MLX4_VF_PORT_NO_LINK_SENSE_MASK 0xE0 +#define MLX4_PORT_LINK_UP_MASK 0x80 #define QUERY_PORT_CUR_MAX_PKEY_OFFSET 0x0c #define QUERY_PORT_CUR_MAX_GID_OFFSET 0x0e @@ -861,6 +863,12 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave, /* set port type to currently operating port type */ port_type |= (dev->caps.port_type[vhcr->in_modifier] & 0x3); + admin_link_state = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.link_state; + if (IFLA_VF_LINK_STATE_ENABLE == admin_link_state) + port_type |= MLX4_PORT_LINK_UP_MASK; + else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state) + port_type &= ~MLX4_PORT_LINK_UP_MASK; + MLX4_PUT(outbox->buf, port_type, QUERY_PORT_SUPPORTED_TYPE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index df15bb6631c..75272935a3f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -482,6 +482,7 @@ struct mlx4_vport_state { u8 default_qos; u32 tx_rate; bool spoofchk; + u32 link_state; }; struct mlx4_vf_admin_state { -- cgit v1.2.3-18-g5258 From 9e77a2b837bbd7197da966f0915e8f1ddb2ca850 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 18 Jun 2013 16:18:27 +0300 Subject: net/mlx4_en: Add Low Latency Socket (LLS) support Add basic support for LLS. Signed-off-by: Amir Vadai Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 3 + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 43 ++++++++- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 15 ++- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 121 +++++++++++++++++++++++++ 4 files changed, 178 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 1e6c594d6d0..3e2d5047cdb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -139,6 +139,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, if (!cq->is_tx) { netif_napi_add(cq->dev, &cq->napi, mlx4_en_poll_rx_cq, 64); + napi_hash_add(&cq->napi); napi_enable(&cq->napi); } @@ -162,6 +163,8 @@ void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { if (!cq->is_tx) { napi_disable(&cq->napi); + napi_hash_del(&cq->napi); + synchronize_rcu(); netif_napi_del(&cq->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index ade276cca0e..ab9ec91d1f7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -67,6 +68,30 @@ int mlx4_en_setup_tc(struct net_device *dev, u8 up) return 0; } +#ifdef CONFIG_NET_LL_RX_POLL +/* must be called with local_bh_disable()d */ +static int mlx4_en_low_latency_recv(struct napi_struct *napi) +{ + struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_rx_ring *rx_ring = &priv->rx_ring[cq->ring]; + int done; + + if (!priv->port_up) + return LL_FLUSH_FAILED; + + if (!mlx4_en_cq_lock_poll(cq)) + return LL_FLUSH_BUSY; + + done = mlx4_en_process_rx_cq(dev, cq, 4); + + mlx4_en_cq_unlock_poll(cq); + + return done; +} +#endif /* CONFIG_NET_LL_RX_POLL */ + #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { @@ -1445,6 +1470,8 @@ int mlx4_en_start_port(struct net_device *dev) for (i = 0; i < priv->rx_ring_num; i++) { cq = &priv->rx_cq[i]; + mlx4_en_cq_init_lock(cq); + err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); @@ -1694,10 +1721,19 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { + struct mlx4_en_cq *cq = &priv->rx_cq[i]; + + local_bh_disable(); + while (!mlx4_en_cq_lock_napi(cq)) { + pr_info("CQ %d locked\n", i); + mdelay(1); + } + local_bh_enable(); + mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); - while (test_bit(NAPI_STATE_SCHED, &priv->rx_cq[i].napi.state)) + while (test_bit(NAPI_STATE_SCHED, &cq->napi.state)) msleep(1); - mlx4_en_deactivate_cq(priv, &priv->rx_cq[i]); + mlx4_en_deactivate_cq(priv, cq); } /* close port*/ @@ -2090,6 +2126,9 @@ static const struct net_device_ops mlx4_netdev_ops = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, #endif +#ifdef CONFIG_NET_LL_RX_POLL + .ndo_ll_poll = mlx4_en_low_latency_recv, +#endif }; static const struct net_device_ops mlx4_netdev_ops_master = { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 02aee1ebd20..9c57581b021 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -31,6 +31,7 @@ * */ +#include #include #include #include @@ -656,8 +657,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * - DIX Ethernet (type interpretation) * - TCP/IP (v4) * - without IP options - * - not an IP fragment */ - if (dev->features & NETIF_F_GRO) { + * - not an IP fragment + * - no LLS polling in progress + */ + if (!mlx4_en_cq_ll_polling(cq) && + (dev->features & NETIF_F_GRO)) { struct sk_buff *gro_skb = napi_get_frags(&cq->napi); if (!gro_skb) goto next; @@ -737,6 +741,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud timestamp); } + skb_mark_ll(skb, &cq->napi); + /* Push it up the stack */ netif_receive_skb(skb); @@ -781,8 +787,13 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) struct mlx4_en_priv *priv = netdev_priv(dev); int done; + if (!mlx4_en_cq_lock_napi(cq)) + return budget; + done = mlx4_en_process_rx_cq(dev, cq, budget); + mlx4_en_cq_unlock_napi(cq); + /* If we used up all the quota - we're probably not done yet... */ if (done == budget) INC_PERF_COUNTER(priv->pstats.napi_quota); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index b1f51c1f635..11c862e4e69 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -310,6 +310,19 @@ struct mlx4_en_cq { u16 moder_cnt; struct mlx4_cqe *buf; #define MLX4_EN_OPCODE_ERROR 0x1e + +#ifdef CONFIG_NET_LL_RX_POLL + unsigned int state; +#define MLX4_EN_CQ_STATE_IDLE 0 +#define MLX4_EN_CQ_STATE_NAPI 1 /* NAPI owns this CQ */ +#define MLX4_EN_CQ_STATE_POLL 2 /* poll owns this CQ */ +#define MLX4_CQ_LOCKED (MLX4_EN_CQ_STATE_NAPI | MLX4_EN_CQ_STATE_POLL) +#define MLX4_EN_CQ_STATE_NAPI_YIELD 4 /* NAPI yielded this CQ */ +#define MLX4_EN_CQ_STATE_POLL_YIELD 8 /* poll yielded this CQ */ +#define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD) +#define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) + spinlock_t poll_lock; /* protects from LLS/napi conflicts */ +#endif /* CONFIG_NET_LL_RX_POLL */ }; struct mlx4_en_port_profile { @@ -562,6 +575,114 @@ struct mlx4_mac_entry { struct rcu_head rcu; }; +#ifdef CONFIG_NET_LL_RX_POLL +static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) +{ + spin_lock_init(&cq->poll_lock); + cq->state = MLX4_EN_CQ_STATE_IDLE; +} + +/* called from the device poll rutine to get ownership of a cq */ +static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) +{ + int rc = true; + spin_lock(&cq->poll_lock); + if (cq->state & MLX4_CQ_LOCKED) { + WARN_ON(cq->state & MLX4_EN_CQ_STATE_NAPI); + cq->state |= MLX4_EN_CQ_STATE_NAPI_YIELD; + rc = false; + } else + /* we don't care if someone yielded */ + cq->state = MLX4_EN_CQ_STATE_NAPI; + spin_unlock(&cq->poll_lock); + return rc; +} + +/* returns true is someone tried to get the cq while napi had it */ +static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) +{ + int rc = false; + spin_lock(&cq->poll_lock); + WARN_ON(cq->state & (MLX4_EN_CQ_STATE_POLL | + MLX4_EN_CQ_STATE_NAPI_YIELD)); + + if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD) + rc = true; + cq->state = MLX4_EN_CQ_STATE_IDLE; + spin_unlock(&cq->poll_lock); + return rc; +} + +/* called from mlx4_en_low_latency_poll() */ +static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) +{ + int rc = true; + spin_lock_bh(&cq->poll_lock); + if ((cq->state & MLX4_CQ_LOCKED)) { + struct net_device *dev = cq->dev; + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_rx_ring *rx_ring = &priv->rx_ring[cq->ring]; + + cq->state |= MLX4_EN_CQ_STATE_POLL_YIELD; + rc = false; + } else + /* preserve yield marks */ + cq->state |= MLX4_EN_CQ_STATE_POLL; + spin_unlock_bh(&cq->poll_lock); + return rc; +} + +/* returns true if someone tried to get the cq while it was locked */ +static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) +{ + int rc = false; + spin_lock_bh(&cq->poll_lock); + WARN_ON(cq->state & (MLX4_EN_CQ_STATE_NAPI)); + + if (cq->state & MLX4_EN_CQ_STATE_POLL_YIELD) + rc = true; + cq->state = MLX4_EN_CQ_STATE_IDLE; + spin_unlock_bh(&cq->poll_lock); + return rc; +} + +/* true if a socket is polling, even if it did not get the lock */ +static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) +{ + WARN_ON(!(cq->state & MLX4_CQ_LOCKED)); + return cq->state & CQ_USER_PEND; +} +#else +static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq) +{ +} + +static inline bool mlx4_en_cq_lock_napi(struct mlx4_en_cq *cq) +{ + return true; +} + +static inline bool mlx4_en_cq_unlock_napi(struct mlx4_en_cq *cq) +{ + return false; +} + +static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) +{ + return false; +} + +static inline bool mlx4_en_cq_unlock_poll(struct mlx4_en_cq *cq) +{ + return false; +} + +static inline bool mlx4_en_cq_ll_polling(struct mlx4_en_cq *cq) +{ + return false; +} +#endif /* CONFIG_NET_LL_RX_POLL */ + #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63) void mlx4_en_update_loopback_state(struct net_device *dev, -- cgit v1.2.3-18-g5258 From 8501841a4483e678ebd1b7872019621244d0098a Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Tue, 18 Jun 2013 16:18:28 +0300 Subject: net/mlx4_en: Low Latency recv statistics Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 20 +++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 6 ++++++ 3 files changed, 29 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c9e6b62dd00..727874f575c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -222,7 +222,12 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) + - (priv->tx_ring_num + priv->rx_ring_num) * 2; + (priv->tx_ring_num * 2) + +#ifdef CONFIG_NET_LL_RX_POLL + (priv->rx_ring_num * 5); +#else + (priv->rx_ring_num * 2); +#endif case ETH_SS_TEST: return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2; @@ -271,6 +276,11 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, for (i = 0; i < priv->rx_ring_num; i++) { data[index++] = priv->rx_ring[i].packets; data[index++] = priv->rx_ring[i].bytes; +#ifdef CONFIG_NET_LL_RX_POLL + data[index++] = priv->rx_ring[i].yields; + data[index++] = priv->rx_ring[i].misses; + data[index++] = priv->rx_ring[i].cleaned; +#endif } spin_unlock_bh(&priv->stats_lock); @@ -334,6 +344,14 @@ static void mlx4_en_get_strings(struct net_device *dev, "rx%d_packets", i); sprintf(data + (index++) * ETH_GSTRING_LEN, "rx%d_bytes", i); +#ifdef CONFIG_NET_LL_RX_POLL + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_napi_yield", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_misses", i); + sprintf(data + (index++) * ETH_GSTRING_LEN, + "rx%d_cleaned", i); +#endif } break; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index ab9ec91d1f7..7299ada876c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -85,6 +85,10 @@ static int mlx4_en_low_latency_recv(struct napi_struct *napi) return LL_FLUSH_BUSY; done = mlx4_en_process_rx_cq(dev, cq, 4); + if (likely(done)) + rx_ring->cleaned += done; + else + rx_ring->misses++; mlx4_en_cq_unlock_poll(cq); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 11c862e4e69..57192a8f1d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -290,6 +290,11 @@ struct mlx4_en_rx_ring { void *rx_info; unsigned long bytes; unsigned long packets; +#ifdef CONFIG_NET_LL_RX_POLL + unsigned long yields; + unsigned long misses; + unsigned long cleaned; +#endif unsigned long csum_ok; unsigned long csum_none; int hwtstamp_rx_filter; @@ -625,6 +630,7 @@ static inline bool mlx4_en_cq_lock_poll(struct mlx4_en_cq *cq) cq->state |= MLX4_EN_CQ_STATE_POLL_YIELD; rc = false; + rx_ring->yields++; } else /* preserve yield marks */ cq->state |= MLX4_EN_CQ_STATE_POLL; -- cgit v1.2.3-18-g5258 From 51151a16a60f0a886a0b1e4a0697001198af50c4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Jun 2013 08:17:56 -0700 Subject: mlx4: allow order-0 memory allocations in RX path Signed-off-by: Eric Dumazet mlx4 exclusively uses order-2 allocations in RX path, which are likely to fail under memory pressure. We therefore drop frames more than needed. This patch tries order-3, order-2, order-1 and finally order-0 allocations to keep good performance, yet allow allocations if/when memory gets fragmented. By using larger pages, and avoiding unnecessary get_page()/put_page() on compound pages, this patch improves performance as well, lowering false sharing on struct page. Also use GFP_KERNEL allocations in initialization path, as allocating 12 MB (390 order-3 pages) can easily fail with GFP_ATOMIC. Signed-off-by: Eric Dumazet Cc: Amir Vadai Acked-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 169 ++++++++++++++------------- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 12 +- 2 files changed, 95 insertions(+), 86 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 9c57581b021..76997b93fdf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -43,40 +43,64 @@ #include "mlx4_en.h" +static int mlx4_alloc_pages(struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *page_alloc, + const struct mlx4_en_frag_info *frag_info, + gfp_t _gfp) +{ + int order; + struct page *page; + dma_addr_t dma; + + for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) { + gfp_t gfp = _gfp; + + if (order) + gfp |= __GFP_COMP | __GFP_NOWARN; + page = alloc_pages(gfp, order); + if (likely(page)) + break; + if (--order < 0 || + ((PAGE_SIZE << order) < frag_info->frag_size)) + return -ENOMEM; + } + dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, + PCI_DMA_FROMDEVICE); + if (dma_mapping_error(priv->ddev, dma)) { + put_page(page); + return -ENOMEM; + } + page_alloc->size = PAGE_SIZE << order; + page_alloc->page = page; + page_alloc->dma = dma; + page_alloc->offset = frag_info->frag_align; + /* Not doing get_page() for each frag is a big win + * on asymetric workloads. + */ + atomic_set(&page->_count, page_alloc->size / frag_info->frag_stride); + return 0; +} + static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, - struct mlx4_en_rx_alloc *ring_alloc) + struct mlx4_en_rx_alloc *ring_alloc, + gfp_t gfp) { struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; - struct mlx4_en_frag_info *frag_info; + const struct mlx4_en_frag_info *frag_info; struct page *page; dma_addr_t dma; int i; for (i = 0; i < priv->num_frags; i++) { frag_info = &priv->frag_info[i]; - if (ring_alloc[i].offset == frag_info->last_offset) { - page = alloc_pages(GFP_ATOMIC | __GFP_COMP, - MLX4_EN_ALLOC_ORDER); - if (!page) - goto out; - dma = dma_map_page(priv->ddev, page, 0, - MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); - if (dma_mapping_error(priv->ddev, dma)) { - put_page(page); - goto out; - } - page_alloc[i].page = page; - page_alloc[i].dma = dma; - page_alloc[i].offset = frag_info->frag_align; - } else { - page_alloc[i].page = ring_alloc[i].page; - get_page(ring_alloc[i].page); - page_alloc[i].dma = ring_alloc[i].dma; - page_alloc[i].offset = ring_alloc[i].offset + - frag_info->frag_stride; - } + page_alloc[i] = ring_alloc[i]; + page_alloc[i].offset += frag_info->frag_stride; + if (page_alloc[i].offset + frag_info->frag_stride <= ring_alloc[i].size) + continue; + if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) + goto out; } for (i = 0; i < priv->num_frags; i++) { @@ -88,14 +112,16 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, return 0; - out: while (i--) { frag_info = &priv->frag_info[i]; - if (ring_alloc[i].offset == frag_info->last_offset) + if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, - MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); - put_page(page_alloc[i].page); + page_alloc[i].size, PCI_DMA_FROMDEVICE); + page = page_alloc[i].page; + atomic_set(&page->_count, 1); + put_page(page); + } } return -ENOMEM; } @@ -104,12 +130,12 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *frags, int i) { - struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; + const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - if (frags[i].offset == frag_info->last_offset) { - dma_unmap_page(priv->ddev, frags[i].dma, MLX4_EN_ALLOC_SIZE, + if (frags[i].offset + frag_info->frag_stride > frags[i].size) + dma_unmap_page(priv->ddev, frags[i].dma, frags[i].size, PCI_DMA_FROMDEVICE); - } + if (frags[i].page) put_page(frags[i].page); } @@ -117,35 +143,28 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { - struct mlx4_en_rx_alloc *page_alloc; int i; + struct mlx4_en_rx_alloc *page_alloc; for (i = 0; i < priv->num_frags; i++) { - page_alloc = &ring->page_alloc[i]; - page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, - MLX4_EN_ALLOC_ORDER); - if (!page_alloc->page) - goto out; + const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - page_alloc->dma = dma_map_page(priv->ddev, page_alloc->page, 0, - MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); - if (dma_mapping_error(priv->ddev, page_alloc->dma)) { - put_page(page_alloc->page); - page_alloc->page = NULL; + if (mlx4_alloc_pages(priv, &ring->page_alloc[i], + frag_info, GFP_KERNEL)) goto out; - } - page_alloc->offset = priv->frag_info[i].frag_align; - en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n", - i, page_alloc->page); } return 0; out: while (i--) { + struct page *page; + page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, - MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); - put_page(page_alloc->page); + page_alloc->size, PCI_DMA_FROMDEVICE); + page = page_alloc->page; + atomic_set(&page->_count, 1); + put_page(page); page_alloc->page = NULL; } return -ENOMEM; @@ -158,13 +177,18 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, int i; for (i = 0; i < priv->num_frags; i++) { + const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; + page_alloc = &ring->page_alloc[i]; en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n", i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - MLX4_EN_ALLOC_SIZE, PCI_DMA_FROMDEVICE); - put_page(page_alloc->page); + page_alloc->size, PCI_DMA_FROMDEVICE); + while (page_alloc->offset + frag_info->frag_stride < page_alloc->size) { + put_page(page_alloc->page); + page_alloc->offset += frag_info->frag_stride; + } page_alloc->page = NULL; } } @@ -195,13 +219,14 @@ static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, } static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring, int index) + struct mlx4_en_rx_ring *ring, int index, + gfp_t gfp) { struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc); + return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); } static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) @@ -235,7 +260,8 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) ring = &priv->rx_ring[ring_ind]; if (mlx4_en_prepare_rx_desc(priv, ring, - ring->actual_size)) { + ring->actual_size, + GFP_KERNEL)) { if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) { en_err(priv, "Failed to allocate " "enough rx buffers\n"); @@ -450,11 +476,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, DMA_FROM_DEVICE); /* Save page reference in skb */ - get_page(frags[nr].page); __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page); skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size); skb_frags_rx[nr].page_offset = frags[nr].offset; skb->truesize += frag_info->frag_stride; + frags[nr].page = NULL; } /* Adjust size of last fragment to match actual length */ if (nr > 0) @@ -547,7 +573,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, int index = ring->prod & ring->size_mask; while ((u32) (ring->prod - ring->cons) < ring->actual_size) { - if (mlx4_en_prepare_rx_desc(priv, ring, index)) + if (mlx4_en_prepare_rx_desc(priv, ring, index, GFP_ATOMIC)) break; ring->prod++; index = ring->prod & ring->size_mask; @@ -805,21 +831,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) return done; } - -/* Calculate the last offset position that accommodates a full fragment - * (assuming fagment size = stride-align) */ -static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align) -{ - u16 res = MLX4_EN_ALLOC_SIZE % stride; - u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align; - - en_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d " - "res:%d offset:%d\n", stride, align, res, offset); - return offset; -} - - -static int frag_sizes[] = { +static const int frag_sizes[] = { FRAG_SZ0, FRAG_SZ1, FRAG_SZ2, @@ -847,9 +859,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) priv->frag_info[i].frag_stride = ALIGN(frag_sizes[i], SMP_CACHE_BYTES); } - priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset( - priv, priv->frag_info[i].frag_stride, - priv->frag_info[i].frag_align); buf_size += priv->frag_info[i].frag_size; i++; } @@ -861,13 +870,13 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " "num_frags:%d):\n", eff_mtu, priv->num_frags); for (i = 0; i < priv->num_frags; i++) { - en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d align:%d " - "stride:%d last_offset:%d\n", i, - priv->frag_info[i].frag_size, - priv->frag_info[i].frag_prefix_size, - priv->frag_info[i].frag_align, - priv->frag_info[i].frag_stride, - priv->frag_info[i].last_offset); + en_err(priv, + " frag:%d - size:%d prefix:%d align:%d stride:%d\n", + i, + priv->frag_info[i].frag_size, + priv->frag_info[i].frag_prefix_size, + priv->frag_info[i].frag_align, + priv->frag_info[i].frag_stride); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 57192a8f1d5..35fb60e2320 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -96,7 +96,8 @@ /* Use the maximum between 16384 and a single page */ #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) -#define MLX4_EN_ALLOC_ORDER get_order(MLX4_EN_ALLOC_SIZE) + +#define MLX4_EN_ALLOC_PREFER_ORDER PAGE_ALLOC_COSTLY_ORDER /* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU * and 4K allocations) */ @@ -234,9 +235,10 @@ struct mlx4_en_tx_desc { #define MLX4_EN_CX3_HIGH_ID 0x1005 struct mlx4_en_rx_alloc { - struct page *page; - dma_addr_t dma; - u16 offset; + struct page *page; + dma_addr_t dma; + u32 offset; + u32 size; }; struct mlx4_en_tx_ring { @@ -439,8 +441,6 @@ struct mlx4_en_frag_info { u16 frag_prefix_size; u16 frag_stride; u16 frag_align; - u16 last_offset; - }; #ifdef CONFIG_MLX4_EN_DCB -- cgit v1.2.3-18-g5258 From 8850494a33cc67bbf31d2e3ce630d0f4e14efa56 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 25 Jun 2013 12:09:29 +0300 Subject: net/mlx4_en: Fix resource leak in error flow Wrong condition was used when calling iounmap. Signed-off-by: Dotan Barak Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index a5c9df07a7d..a071cda2dd0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -310,7 +310,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) err_mr: (void) mlx4_mr_free(dev, &mdev->mr); err_map: - if (!mdev->uar_map) + if (mdev->uar_map) iounmap(mdev->uar_map); err_uar: mlx4_uar_free(dev, &mdev->priv_uar); -- cgit v1.2.3-18-g5258 From 6123db2ec529f1d1865298388e129a6aad2fae17 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 25 Jun 2013 12:09:30 +0300 Subject: net/mlx4_en: Do not query stats when device port is down There are no counters allocated to the eth device when the port is down, so this query is meaningless at that time. It also leads to querying incorrect counters (since the counter_index is not valid when the device port is down). Signed-off-by: Jack Morgenstein Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7299ada876c..c0b02d7bcc2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1375,12 +1375,13 @@ static void mlx4_en_do_get_stats(struct work_struct *work) mutex_lock(&mdev->state_lock); if (mdev->device_up) { - err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); - if (err) - en_dbg(HW, priv, "Could not update stats\n"); + if (priv->port_up) { + err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); + if (err) + en_dbg(HW, priv, "Could not update stats\n"); - if (priv->port_up) mlx4_en_auto_moderation(priv); + } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } -- cgit v1.2.3-18-g5258 From 4801ae70d80dc815997e3cb6bca1dfe36d1594fd Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Tue, 25 Jun 2013 12:09:31 +0300 Subject: net/mlx4_en: Move register_netdev() to the end of initialization function To avoid a race between the open function and everything that happens after register_netdev() move it to be the last operation called. Signed-off-by: Eugenia Emantayev Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c0b02d7bcc2..1f0f8170d79 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2323,6 +2323,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, mdev->pndev[port] = dev; netif_carrier_off(dev); + mlx4_en_set_default_moderation(priv); + err = register_netdev(dev); if (err) { en_err(priv, "Netdev registration failed for port %d\n", port); @@ -2354,7 +2356,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, en_err(priv, "Failed Initializing port\n"); goto out; } - mlx4_en_set_default_moderation(priv); queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) -- cgit v1.2.3-18-g5258 From 9e19b54554fac0a1c1c92f0a10b29d216b84a470 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Tue, 25 Jun 2013 12:09:32 +0300 Subject: net/mlx4_en: Change log level from error to debug for vlan related messages The port vlan table size is 126 (used for IBoE) so after 126 we will not have space and the user need to see it only in debug print and not error. Signed-off-by: Aviad Yehezkel Reviewed-by: Yevgeny Petrilin Signed-off-by: Eugenia Emantayev Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 1f0f8170d79..f256a734e02 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -405,7 +405,7 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, en_err(priv, "Failed configuring VLAN filter\n"); } if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) - en_err(priv, "failed adding vlan %d\n", vid); + en_dbg(HW, priv, "failed adding vlan %d\n", vid); mutex_unlock(&mdev->state_lock); return 0; @@ -428,7 +428,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, if (!mlx4_find_cached_vlan(mdev->dev, priv->port, vid, &idx)) mlx4_unregister_vlan(mdev->dev, priv->port, idx); else - en_err(priv, "could not find vid %d in cache\n", vid); + en_dbg(HW, priv, "could not find vid %d in cache\n", vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); -- cgit v1.2.3-18-g5258 From 0cc5c8bf11852dec3225fda2f53a599243095d23 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Tue, 25 Jun 2013 12:09:33 +0300 Subject: net/mlx4_en: Fix a race between napi poll function and RX ring cleanup The RX rings were cleaned while there was still possible RX traffic completion handling. Change the sequance of events so that the port is closed and the QPs are being stopped before RX cleanup. Signed-off-by: Eugenia Emantayev Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f256a734e02..f1dcddcb805 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1635,6 +1635,9 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) return; } + /* close port*/ + mlx4_CLOSE_PORT(mdev->dev, priv->port); + /* Synchronize with tx routine */ netif_tx_lock_bh(dev); if (detach) @@ -1735,14 +1738,11 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) } local_bh_enable(); - mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); while (test_bit(NAPI_STATE_SCHED, &cq->napi.state)) msleep(1); + mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); } - - /* close port*/ - mlx4_CLOSE_PORT(mdev->dev, priv->port); } static void mlx4_en_restart(struct work_struct *work) -- cgit v1.2.3-18-g5258 From b944ebec787be9396978b0f7773f99e751330196 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Tue, 25 Jun 2013 12:09:34 +0300 Subject: net/mlx4_en: Add prints when TX timeout occurs Print a warning when a TX timeout is detected Signed-off-by: Yevgeny Petrilin Signed-off-by: Eugenia Emantayev Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index f1dcddcb805..caf20477056 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1236,10 +1236,19 @@ static void mlx4_en_tx_timeout(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + int i; if (netif_msg_timer(priv)) en_warn(priv, "Tx timeout called on port:%d\n", priv->port); + for (i = 0; i < priv->tx_ring_num; i++) { + if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) + continue; + en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n", + i, priv->tx_ring[i].qpn, priv->tx_ring[i].cqn, + priv->tx_ring[i].cons, priv->tx_ring[i].prod); + } + priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Scheduling watchdog\n"); queue_work(mdev->workqueue, &priv->watchdog_task); -- cgit v1.2.3-18-g5258 From 42f1e9020e22d64d18292c6cb9182f4beeb43cad Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 25 Jun 2013 12:09:35 +0300 Subject: net/mlx4_en: Remove an unnecessary test Since this variable is now part of a structure and not allocated dynamically, this test is irrelevant now. Signed-off-by: Dotan Barak Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 0f91222ea3d..9d4a1ea030d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -207,9 +207,6 @@ static int mlx4_en_dcbnl_ieee_getmaxrate(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); int i; - if (!priv->maxrate) - return -EINVAL; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) maxrate->tc_maxrate[i] = priv->maxrate[i] * MLX4_RATELIMIT_UNITS_IN_KB; -- cgit v1.2.3-18-g5258 From 618fad954b9becf095f1b52391af29f743ff4662 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 25 Jun 2013 12:09:36 +0300 Subject: net/mlx4_core: Replace sscanf() with kstrtoint() It is not safe to use sscanf. Signed-off-by: Dotan Barak Signed-off-by: Vladimir Sokolovsky Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2f4a26039e8..81e4529092a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -839,11 +839,11 @@ static ssize_t set_port_ib_mtu(struct device *dev, return -EINVAL; } - err = sscanf(buf, "%d", &mtu); - if (err > 0) + err = kstrtoint(buf, 0, &mtu); + if (!err) ibta_mtu = int_to_ibta_mtu(mtu); - if (err <= 0 || ibta_mtu < 0) { + if (err || ibta_mtu < 0) { mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf); return -EINVAL; } -- cgit v1.2.3-18-g5258 From 674925edb466b027d7c61993ebe3250fb8989ee0 Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Tue, 25 Jun 2013 12:09:37 +0300 Subject: net/mlx4_core: Add warning in case of command timeouts Warning prints when there are command timeout to help debugging future failures. Signed-off-by: Dotan Barak Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index ea1e0389635..df04c8206eb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -257,6 +257,8 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { + mlx4_warn(dev, "communication channel command 0x%x timed out\n", + op); err = -EBUSY; goto out; } @@ -486,6 +488,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } if (cmd_pending(dev)) { + mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", + op); err = -ETIMEDOUT; goto out; } @@ -549,6 +553,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { + mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", + op); err = -EBUSY; goto out; } -- cgit v1.2.3-18-g5258 From 30e514a71753ac3fd0ddea1411d5602ccbe14acf Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 25 Jun 2013 12:09:38 +0300 Subject: net/mlx4_core: Fail device init if num_vfs is negative Should not allow negative num_vfs Signed-off-by: Jack Morgenstein Signed-off-by: Vladimir Sokolovsky Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 81e4529092a..56160a2bb57 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2077,6 +2077,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) num_vfs, MLX4_MAX_NUM_VF); return -EINVAL; } + + if (num_vfs < 0) { + pr_err("num_vfs module parameter cannot be negative\n"); + return -EINVAL; + } /* * Check for BARs. */ -- cgit v1.2.3-18-g5258 From b01978cacfd7e3a4ca703b0e48f2e18de8865df5 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 27 Jun 2013 19:05:21 +0300 Subject: net/mlx4_core: Dynamic VST to VST vlan/qos changes Within VST mode, enable modifying the vlan and/or qos for a VF without requiring unbind/rebind. This requires firmware which supports the UPDATE_QP command. (If the command is not available, we fall back to requiring unbind/bind to activate these changes). To avoid race conditions with modify-qp on QPs that are affected by update-qp, this operation is performed on the comm_wq. If the update operation succeeds for all the necessary QPs, a vlan_unregister is performed for the abandoned vlan id. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 122 ++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/fw.c | 5 +- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 20 +++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 145 ++++++++++++++++++++- 4 files changed, 282 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet/mellanox/mlx4') diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index df04c8206eb..7b927891dc3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -112,6 +112,14 @@ enum { GO_BIT_TIMEOUT_MSECS = 10000 }; +enum mlx4_vlan_transition { + MLX4_VLAN_TRANSITION_VST_VST = 0, + MLX4_VLAN_TRANSITION_VST_VGT = 1, + MLX4_VLAN_TRANSITION_VGT_VST = 2, + MLX4_VLAN_TRANSITION_VGT_VGT = 3, +}; + + struct mlx4_cmd_context { struct completion done; int result; @@ -792,6 +800,15 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } +int MLX4_CMD_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + return -EPERM; +} + int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1225,6 +1242,15 @@ static struct mlx4_cmd_info cmd_info[] = { .verify = NULL, .wrapper = mlx4_GEN_QP_wrapper }, + { + .opcode = MLX4_CMD_UPDATE_QP, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = MLX4_CMD_UPDATE_QP_wrapper + }, { .opcode = MLX4_CMD_CONF_SPECIAL_QP, .has_inbox = false, @@ -1495,6 +1521,72 @@ out: return ret; } + +int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, + int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + struct mlx4_vf_immed_vlan_work *work; + int err; + int admin_vlan_ix = NO_INDX; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos) + return 0; + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + if (vp_oper->state.default_vlan != vp_admin->default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &admin_vlan_ix); + if (err) { + mlx4_warn((&priv->dev), + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + work->flags |= MLX4_VF_IMMED_VLAN_FLAG_VLAN; + mlx4_dbg((&(priv->dev)), + "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_admin->default_vlan), + admin_vlan_ix, slave, port); + } + + /* save original vlan ix and vlan id */ + work->orig_vlan_id = vp_oper->state.default_vlan; + work->orig_vlan_ix = vp_oper->vlan_idx; + + /* handle new qos */ + if (vp_oper->state.default_qos != vp_admin->default_qos) + work->flags |= MLX4_VF_IMMED_VLAN_FLAG_QOS; + + if (work->flags & MLX4_VF_IMMED_VLAN_FLAG_VLAN) + vp_oper->vlan_idx = admin_vlan_ix; + + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + + /* iterate over QPs owned by this slave, using UPDATE_QP */ + work->port = port; + work->slave = slave; + work->qos = vp_oper->state.default_qos; + work->vlan_id = vp_oper->state.default_vlan; + work->vlan_ix = vp_oper->vlan_idx; + work->priv = priv; + INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler); + queue_work(priv->mfunc.master.comm_wq, &work->work); + + return 0; +} + + static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) { int port, err; @@ -2109,11 +2201,18 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) } EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); +static int calculate_transition(u16 oper_vlan, u16 admin_vlan) +{ + return (2 * (oper_vlan == MLX4_VGT) + (admin_vlan == MLX4_VGT)); +} + int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) { struct mlx4_priv *priv = mlx4_priv(dev); - struct mlx4_vport_state *s_info; + struct mlx4_vport_oper_state *vf_oper; + struct mlx4_vport_state *vf_admin; int slave; + enum mlx4_vlan_transition vlan_trans; if ((!mlx4_is_master(dev)) || !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_VLAN_CONTROL)) @@ -2126,12 +2225,25 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) if (slave < 0) return -EINVAL; - s_info = &priv->mfunc.master.vf_admin[slave].vport[port]; + vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + if ((0 == vlan) && (0 == qos)) - s_info->default_vlan = MLX4_VGT; + vf_admin->default_vlan = MLX4_VGT; else - s_info->default_vlan = vlan; - s_info->default_qos = qos; + vf_admin->default_vlan = vlan; + vf_admin->default_qos = qos; + + vlan_trans = calculate_transition(vf_oper->state.default_vlan, + vf_admin->default_vlan); + + if (priv->mfunc.master.slave_state[slave].active && + dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP && + vlan_trans == MLX4_VLAN_TRANSITION_VST_VST) { + mlx4_info(dev, "updating vf %d port %d config params immediately\n", + vf, port); + mlx4_master_immediate_activate_vlan_qos(priv, slave, port); + } return 0; } EXPORT_SYMBOL_GPL(mlx4_set_vf_vlan); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 569bbe3e740..8873d6802c8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -133,7 +133,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [4] = "Automatic MAC reassignment support", [5] = "Time stamping support", [6] = "VST (control vlan insertion/stripping) support", - [7] = "FSM (MAC anti-spoofing) support" + [7] = "FSM (MAC anti-spoofing) support", + [8] = "Dynamic QP updates support" }; int i; @@ -659,6 +660,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) QUERY_DEV_CAP_MAX_COUNTERS_OFFSET); MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); + if (field32 & (1 << 16)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; if (field32 & (1 << 26)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL; if (field32 & (1 << 20)) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 75272935a3f..5abcb6501e3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -571,6 +571,24 @@ struct mlx4_cmd { u8 comm_toggle; }; +enum { + MLX4_VF_IMMED_VLAN_FLAG_VLAN = 1 << 0, + MLX4_VF_IMMED_VLAN_FLAG_QOS = 1 << 1, +}; +struct mlx4_vf_immed_vlan_work { + struct work_struct work; + struct mlx4_priv *priv; + int flags; + int slave; + int vlan_ix; + int orig_vlan_ix; + u8 port; + u8 qos; + u16 vlan_id; + u16 orig_vlan_id; +}; + + struct mlx4_uar_table { struct mlx4_bitmap bitmap; }; @@ -1218,4 +1236,6 @@ static inline spinlock_t *mlx4_tlock(struct mlx4_dev *dev) #define NOT_MASKED_PD_BITS 17 +void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work); + #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1157f028a90..46323a20060 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -101,6 +101,8 @@ struct res_qp { spinlock_t mcg_spl; int local_qpn; atomic_t ref_count; + u32 qpc_flags; + u8 sched_queue; }; enum res_mtt_states { @@ -355,7 +357,7 @@ static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, - u8 slave) + u8 slave, u32 qpn) { struct mlx4_qp_context *qpc = inbox->buf + 8; struct mlx4_vport_oper_state *vp_oper; @@ -369,9 +371,17 @@ static int update_vport_qp_param(struct mlx4_dev *dev, if (MLX4_VGT != vp_oper->state.default_vlan) { qp_type = (be32_to_cpu(qpc->flags) >> 16) & 0xff; - if (MLX4_QP_ST_RC == qp_type) + if (MLX4_QP_ST_RC == qp_type || + (MLX4_QP_ST_UD == qp_type && + !mlx4_is_qp_reserved(dev, qpn))) return -EINVAL; + /* the reserved QPs (special, proxy, tunnel) + * do not operate over vlans + */ + if (mlx4_is_qp_reserved(dev, qpn)) + return 0; + /* force strip vlan by clear vsd */ qpc->param3 &= ~cpu_to_be32(MLX4_STRIP_VLAN); if (0 != vp_oper->state.default_vlan) { @@ -2114,6 +2124,8 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; qp->local_qpn = local_qpn; + qp->sched_queue = 0; + qp->qpc_flags = be32_to_cpu(qpc->flags); err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); if (err) @@ -2836,6 +2848,9 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, { int err; struct mlx4_qp_context *qpc = inbox->buf + 8; + int qpn = vhcr->in_modifier & 0x7fffff; + struct res_qp *qp; + u8 orig_sched_queue; err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); if (err) @@ -2844,11 +2859,30 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, update_pkey_index(dev, slave, inbox); update_gid(dev, inbox, (u8)slave); adjust_proxy_tun_qkey(dev, vhcr, qpc); - err = update_vport_qp_param(dev, inbox, slave); + orig_sched_queue = qpc->pri_path.sched_queue; + err = update_vport_qp_param(dev, inbox, slave, qpn); if (err) return err; - return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + err = get_res(dev, slave, qpn, RES_QP, &qp); + if (err) + return err; + if (qp->com.from_state != RES_QP_HW) { + err = -EBUSY; + goto out; + } + + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +out: + /* if no error, save sched queue value passed in by VF. This is + * essentially the QOS value provided by the VF. This will be useful + * if we allow dynamic changes from VST back to VGT + */ + if (!err) + qp->sched_queue = orig_sched_queue; + + put_res(dev, slave, qpn, RES_QP); + return err; } int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, @@ -3932,3 +3966,106 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave) rem_slave_xrcdns(dev, slave); mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex); } + +void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) +{ + struct mlx4_vf_immed_vlan_work *work = + container_of(_work, struct mlx4_vf_immed_vlan_work, work); + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_update_qp_context *upd_context; + struct mlx4_dev *dev = &work->priv->dev; + struct mlx4_resource_tracker *tracker = + &work->priv->mfunc.master.res_tracker; + struct list_head *qp_list = + &tracker->slave_list[work->slave].res_list[RES_QP]; + struct res_qp *qp; + struct res_qp *tmp; + u64 qp_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_UNTAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_1P) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_TX_BLOCK_TAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_UNTAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_1P) | + (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_RX_BLOCK_TAGGED) | + (1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) | + (1ULL << MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE)); + + int err; + int port, errors = 0; + u8 vlan_control; + + if (mlx4_is_slave(dev)) { + mlx4_warn(dev, "Trying to update-qp in slave %d\n", + work->slave); + goto out; + } + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + goto out; + + if (!work->vlan_id) + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; + else + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + + upd_context = mailbox->buf; + upd_context->primary_addr_path_mask = cpu_to_be64(qp_mask); + upd_context->qp_context.pri_path.vlan_control = vlan_control; + upd_context->qp_context.pri_path.vlan_index = work->vlan_ix; + + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(qp, tmp, qp_list, com.list) { + spin_unlock_irq(mlx4_tlock(dev)); + if (qp->com.owner == work->slave) { + if (qp->com.from_state != RES_QP_HW || + !qp->sched_queue || /* no INIT2RTR trans yet */ + mlx4_is_qp_reserved(dev, qp->local_qpn) || + qp->qpc_flags & (1 << MLX4_RSS_QPC_FLAG_OFFSET)) { + spin_lock_irq(mlx4_tlock(dev)); + continue; + } + port = (qp->sched_queue >> 6 & 1) + 1; + if (port != work->port) { + spin_lock_irq(mlx4_tlock(dev)); + continue; + } + upd_context->qp_context.pri_path.sched_queue = + qp->sched_queue & 0xC7; + upd_context->qp_context.pri_path.sched_queue |= + ((work->qos & 0x7) << 3); + + err = mlx4_cmd(dev, mailbox->dm