aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/chelsio/cxgb4/sge.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4/sge.c')
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c203
1 files changed, 148 insertions, 55 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ac311f5f3eb..dd4355d248e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -93,6 +93,16 @@
*/
#define TX_QCHECK_PERIOD (HZ / 2)
+/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate
+ * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA
+ * State Machines in the same state for this amount of time (in HZ) then we'll
+ * issue a warning about a potential hang. We'll repeat the warning as the
+ * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till
+ * the situation clears. If the situation clears, we'll note that as well.
+ */
+#define SGE_IDMA_WARN_THRESH (1 * HZ)
+#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD)
+
/*
* Max number of Tx descriptors to be reclaimed by the Tx timer.
*/
@@ -373,7 +383,7 @@ static void free_tx_desc(struct adapter *adap, struct sge_txq *q,
if (d->skb) { /* an SGL is present */
if (unmap)
unmap_sgl(dev, d->skb, d->sgl, q);
- kfree_skb(d->skb);
+ dev_consume_skb_any(d->skb);
d->skb = NULL;
}
++d;
@@ -509,7 +519,7 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
u32 val;
if (q->pend_cred >= 8) {
val = PIDX(q->pend_cred / 8);
- if (!is_t4(adap->chip))
+ if (!is_t4(adap->params.chip))
val |= DBTYPE(1);
wmb();
t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) |
@@ -706,11 +716,17 @@ static inline unsigned int flits_to_desc(unsigned int n)
* @skb: the packet
*
* Returns whether an Ethernet packet is small enough to fit as
- * immediate data.
+ * immediate data. Return value corresponds to headroom required.
*/
static inline int is_eth_imm(const struct sk_buff *skb)
{
- return skb->len <= MAX_IMM_TX_PKT_LEN - sizeof(struct cpl_tx_pkt);
+ int hdrlen = skb_shinfo(skb)->gso_size ?
+ sizeof(struct cpl_tx_pkt_lso_core) : 0;
+
+ hdrlen += sizeof(struct cpl_tx_pkt);
+ if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen)
+ return hdrlen;
+ return 0;
}
/**
@@ -723,9 +739,10 @@ static inline int is_eth_imm(const struct sk_buff *skb)
static inline unsigned int calc_tx_flits(const struct sk_buff *skb)
{
unsigned int flits;
+ int hdrlen = is_eth_imm(skb);
- if (is_eth_imm(skb))
- return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt), 8);
+ if (hdrlen)
+ return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64));
flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4;
if (skb_shinfo(skb)->gso_size)
@@ -843,11 +860,12 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
{
unsigned int *wr, index;
+ unsigned long flags;
wmb(); /* write descriptors before telling HW */
- spin_lock(&q->db_lock);
+ spin_lock_irqsave(&q->db_lock, flags);
if (!q->db_disabled) {
- if (is_t4(adap->chip)) {
+ if (is_t4(adap->params.chip)) {
t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
QID(q->cntxt_id) | PIDX(n));
} else {
@@ -861,9 +879,10 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
writel(n, adap->bar2 + q->udb + 8);
wmb();
}
- }
+ } else
+ q->db_pidx_inc += n;
q->db_pidx = q->pidx;
- spin_unlock(&q->db_lock);
+ spin_unlock_irqrestore(&q->db_lock, flags);
}
/**
@@ -971,6 +990,7 @@ static inline void txq_advance(struct sge_txq *q, unsigned int n)
*/
netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ int len;
u32 wr_mid;
u64 cntrl, *end;
int qidx, credits;
@@ -982,13 +1002,14 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
struct cpl_tx_pkt_core *cpl;
const struct skb_shared_info *ssi;
dma_addr_t addr[MAX_SKB_FRAGS + 1];
+ bool immediate = false;
/*
* The chip min packet length is 10 octets but play safe and reject
* anything shorter than an Ethernet header.
*/
if (unlikely(skb->len < ETH_HLEN)) {
-out_free: dev_kfree_skb(skb);
+out_free: dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
@@ -1011,7 +1032,10 @@ out_free: dev_kfree_skb(skb);
return NETDEV_TX_BUSY;
}
- if (!is_eth_imm(skb) &&
+ if (is_eth_imm(skb))
+ immediate = true;
+
+ if (!immediate &&
unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) {
q->mapping_err++;
goto out_free;
@@ -1028,6 +1052,7 @@ out_free: dev_kfree_skb(skb);
wr->r3 = cpu_to_be64(0);
end = (u64 *)wr + flits;
+ len = immediate ? skb->len : 0;
ssi = skb_shinfo(skb);
if (ssi->gso_size) {
struct cpl_tx_pkt_lso *lso = (void *)wr;
@@ -1035,8 +1060,9 @@ out_free: dev_kfree_skb(skb);
int l3hdr_len = skb_network_header_len(skb);
int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
+ len += sizeof(*lso);
wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) |
- FW_WR_IMMDLEN(sizeof(*lso)));
+ FW_WR_IMMDLEN(len));
lso->c.lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) |
LSO_FIRST_SLICE | LSO_LAST_SLICE |
LSO_IPV6(v6) |
@@ -1054,9 +1080,7 @@ out_free: dev_kfree_skb(skb);
q->tso++;
q->tx_cso += ssi->gso_segs;
} else {
- int len;
-
- len = is_eth_imm(skb) ? skb->len + sizeof(*cpl) : sizeof(*cpl);
+ len += sizeof(*cpl);
wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) |
FW_WR_IMMDLEN(len));
cpl = (void *)(wr + 1);
@@ -1078,9 +1102,9 @@ out_free: dev_kfree_skb(skb);
cpl->len = htons(skb->len);
cpl->ctrl1 = cpu_to_be64(cntrl);
- if (is_eth_imm(skb)) {
+ if (immediate) {
inline_tx_skb(skb, &q->q, cpl + 1);
- dev_kfree_skb(skb);
+ dev_consume_skb_any(skb);
} else {
int last_desc;
@@ -1467,8 +1491,12 @@ static inline int ofld_send(struct adapter *adap, struct sk_buff *skb)
{
unsigned int idx = skb_txq(skb);
- if (unlikely(is_ctrl_pkt(skb)))
+ if (unlikely(is_ctrl_pkt(skb))) {
+ /* Single ctrl queue is a requirement for LE workaround path */
+ if (adap->tids.nsftids)
+ idx = 0;
return ctrl_xmit(&adap->sge.ctrlq[idx], skb);
+ }
return ofld_xmit(&adap->sge.ofldtxq[idx], skb);
}
@@ -1596,7 +1624,7 @@ static noinline int handle_trace_pkt(struct adapter *adap,
return 0;
}
- if (is_t4(adap->chip))
+ if (is_t4(adap->params.chip))
__skb_pull(skb, sizeof(struct cpl_trace_pkt));
else
__skb_pull(skb, sizeof(struct cpl_t5_trace_pkt));
@@ -1630,7 +1658,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb_record_rx_queue(skb, rxq->rspq.idx);
if (rxq->rspq.netdev->features & NETIF_F_RXHASH)
- skb->rxhash = (__force u32)pkt->rsshdr.hash_val;
+ skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val,
+ PKT_HASH_TYPE_L3);
if (unlikely(pkt->vlan_ex)) {
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan));
@@ -1661,14 +1690,15 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
const struct cpl_rx_pkt *pkt;
struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
struct sge *s = &q->adap->sge;
- int cpl_trace_pkt = is_t4(q->adap->chip) ?
+ int cpl_trace_pkt = is_t4(q->adap->params.chip) ?
CPL_TRACE_PKT : CPL_TRACE_PKT_T5;
if (unlikely(*(u8 *)rsp == cpl_trace_pkt))
return handle_trace_pkt(q->adap, si);
pkt = (const struct cpl_rx_pkt *)rsp;
- csum_ok = pkt->csum_calc && !pkt->err_vec;
+ csum_ok = pkt->csum_calc && !pkt->err_vec &&
+ (q->netdev->features & NETIF_F_RXCSUM);
if ((pkt->l2info & htonl(RXF_TCP)) &&
(q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
do_gro(rxq, si, pkt);
@@ -1686,12 +1716,12 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
skb->protocol = eth_type_trans(skb, q->netdev);
skb_record_rx_queue(skb, q->idx);
if (skb->dev->features & NETIF_F_RXHASH)
- skb->rxhash = (__force u32)pkt->rsshdr.hash_val;
+ skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val,
+ PKT_HASH_TYPE_L3);
rxq->stats.pkts++;
- if (csum_ok && (q->netdev->features & NETIF_F_RXCSUM) &&
- (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) {
+ if (csum_ok && (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) {
if (!pkt->ip_frag) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
rxq->stats.rx_cso++;
@@ -1990,7 +2020,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap)
static void sge_rx_timer_cb(unsigned long data)
{
unsigned long m;
- unsigned int i, cnt[2];
+ unsigned int i, idma_same_state_cnt[2];
struct adapter *adap = (struct adapter *)data;
struct sge *s = &adap->sge;
@@ -2001,7 +2031,7 @@ static void sge_rx_timer_cb(unsigned long data)
struct sge_fl *fl = s->egr_map[id];
clear_bit(id, s->starving_fl);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
if (fl_starving(fl)) {
rxq = container_of(fl, struct sge_eth_rxq, fl);
@@ -2013,21 +2043,64 @@ static void sge_rx_timer_cb(unsigned long data)
}
t4_write_reg(adap, SGE_DEBUG_INDEX, 13);
- cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH);
- cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW);
-
- for (i = 0; i < 2; i++)
- if (cnt[i] >= s->starve_thres) {
- if (s->idma_state[i] || cnt[i] == 0xffffffff)
- continue;
- s->idma_state[i] = 1;
- t4_write_reg(adap, SGE_DEBUG_INDEX, 11);
- m = t4_read_reg(adap, SGE_DEBUG_DATA_LOW) >> (i * 16);
- dev_warn(adap->pdev_dev,
- "SGE idma%u starvation detected for "
- "queue %lu\n", i, m & 0xffff);
- } else if (s->idma_state[i])
- s->idma_state[i] = 0;
+ idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH);
+ idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW);
+
+ for (i = 0; i < 2; i++) {
+ u32 debug0, debug11;
+
+ /* If the Ingress DMA Same State Counter ("timer") is less
+ * than 1s, then we can reset our synthesized Stall Timer and
+ * continue. If we have previously emitted warnings about a
+ * potential stalled Ingress Queue, issue a note indicating
+ * that the Ingress Queue has resumed forward progress.
+ */
+ if (idma_same_state_cnt[i] < s->idma_1s_thresh) {
+ if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH)
+ CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n",
+ i, s->idma_qid[i],
+ s->idma_stalled[i]/HZ);
+ s->idma_stalled[i] = 0;
+ continue;
+ }
+
+ /* Synthesize an SGE Ingress DMA Same State Timer in the Hz
+ * domain. The first time we get here it'll be because we
+ * passed the 1s Threshold; each additional time it'll be
+ * because the RX Timer Callback is being fired on its regular
+ * schedule.
+ *
+ * If the stall is below our Potential Hung Ingress Queue
+ * Warning Threshold, continue.
+ */
+ if (s->idma_stalled[i] == 0)
+ s->idma_stalled[i] = HZ;
+ else
+ s->idma_stalled[i] += RX_QCHECK_PERIOD;
+
+ if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH)
+ continue;
+
+ /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */
+ if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0)
+ continue;
+
+ /* Read and save the SGE IDMA State and Queue ID information.
+ * We do this every time in case it changes across time ...
+ */
+ t4_write_reg(adap, SGE_DEBUG_INDEX, 0);
+ debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW);
+ s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
+
+ t4_write_reg(adap, SGE_DEBUG_INDEX, 11);
+ debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW);
+ s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
+
+ CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n",
+ i, s->idma_qid[i], s->idma_state[i],
+ s->idma_stalled[i]/HZ, debug0, debug11);
+ t4_sge_decode_idma_state(adap, s->idma_state[i]);
+ }
mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
}
@@ -2142,7 +2215,6 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
iq->cntxt_id = ntohs(c.iqid);
iq->abs_id = ntohs(c.physiqid);
iq->size--; /* subtract status entry */
- iq->adap = adap;
iq->netdev = dev;
iq->handler = hnd;
@@ -2182,7 +2254,7 @@ err:
static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id)
{
q->cntxt_id = id;
- if (!is_t4(adap->chip)) {
+ if (!is_t4(adap->params.chip)) {
unsigned int s_qpp;
unsigned short udb_density;
unsigned long qpshift;
@@ -2442,6 +2514,10 @@ void t4_free_sge_resources(struct adapter *adap)
if (oq->rspq.desc)
free_rspq_fl(adap, &oq->rspq, &oq->fl);
}
+ for (i = 0, oq = adap->sge.rdmaciq; i < adap->sge.rdmaciqs; i++, oq++) {
+ if (oq->rspq.desc)
+ free_rspq_fl(adap, &oq->rspq, &oq->fl);
+ }
/* clean up offload Tx queues */
for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) {
@@ -2578,11 +2654,19 @@ static int t4_sge_init_soft(struct adapter *adap)
fl_small_mtu = READ_FL_BUF(RX_SMALL_MTU_BUF);
fl_large_mtu = READ_FL_BUF(RX_LARGE_MTU_BUF);
+ /* We only bother using the Large Page logic if the Large Page Buffer
+ * is larger than our Page Size Buffer.
+ */
+ if (fl_large_pg <= fl_small_pg)
+ fl_large_pg = 0;
+
#undef READ_FL_BUF
+ /* The Page Size Buffer must be exactly equal to our Page Size and the
+ * Large Page Size Buffer should be 0 (per above) or a power of 2.
+ */
if (fl_small_pg != PAGE_SIZE ||
- (fl_large_pg != 0 && (fl_large_pg <= fl_small_pg ||
- (fl_large_pg & (fl_large_pg-1)) != 0))) {
+ (fl_large_pg & (fl_large_pg-1)) != 0) {
dev_err(adap->pdev_dev, "bad SGE FL page buffer sizes [%d, %d]\n",
fl_small_pg, fl_large_pg);
return -EINVAL;
@@ -2641,7 +2725,7 @@ static int t4_sge_init_hard(struct adapter *adap)
* Set up to drop DOORBELL writes when the DOORBELL FIFO overflows
* and generate an interrupt when this occurs so we can recover.
*/
- if (is_t4(adap->chip)) {
+ if (is_t4(adap->params.chip)) {
t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS,
V_HP_INT_THRESH(M_HP_INT_THRESH) |
V_LP_INT_THRESH(M_LP_INT_THRESH),
@@ -2697,8 +2781,8 @@ static int t4_sge_init_hard(struct adapter *adap)
int t4_sge_init(struct adapter *adap)
{
struct sge *s = &adap->sge;
- u32 sge_control;
- int ret;
+ u32 sge_control, sge_conm_ctrl;
+ int ret, egress_threshold;
/*
* Ingress Padding Boundary and Egress Status Page Size are set up by
@@ -2723,15 +2807,24 @@ int t4_sge_init(struct adapter *adap)
* SGE's Egress Congestion Threshold. If it isn't, then we can get
* stuck waiting for new packets while the SGE is waiting for us to
* give it more Free List entries. (Note that the SGE's Egress
- * Congestion Threshold is in units of 2 Free List pointers.)
+ * Congestion Threshold is in units of 2 Free List pointers.) For T4,
+ * there was only a single field to control this. For T5 there's the
+ * original field which now only applies to Unpacked Mode Free List
+ * buffers and a new field which only applies to Packed Mode Free List
+ * buffers.
*/
- s->fl_starve_thres
- = EGRTHRESHOLD_GET(t4_read_reg(adap, SGE_CONM_CTRL))*2 + 1;
+ sge_conm_ctrl = t4_read_reg(adap, SGE_CONM_CTRL);
+ if (is_t4(adap->params.chip))
+ egress_threshold = EGRTHRESHOLD_GET(sge_conm_ctrl);
+ else
+ egress_threshold = EGRTHRESHOLDPACKING_GET(sge_conm_ctrl);
+ s->fl_starve_thres = 2*egress_threshold + 1;
setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
- s->starve_thres = core_ticks_per_usec(adap) * 1000000; /* 1 s */
- s->idma_state[0] = s->idma_state[1] = 0;
+ s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */
+ s->idma_stalled[0] = 0;
+ s->idma_stalled[1] = 0;
spin_lock_init(&s->intrq_lock);
return 0;