diff options
Diffstat (limited to 'drivers/net/ethernet/chelsio/cxgb4/sge.c')
| -rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/sge.c | 203 |
1 files changed, 148 insertions, 55 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index ac311f5f3eb..dd4355d248e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -93,6 +93,16 @@ */ #define TX_QCHECK_PERIOD (HZ / 2) +/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate + * (in RX_QCHECK_PERIOD multiples). If we find one of the SGE Ingress DMA + * State Machines in the same state for this amount of time (in HZ) then we'll + * issue a warning about a potential hang. We'll repeat the warning as the + * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till + * the situation clears. If the situation clears, we'll note that as well. + */ +#define SGE_IDMA_WARN_THRESH (1 * HZ) +#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD) + /* * Max number of Tx descriptors to be reclaimed by the Tx timer. */ @@ -373,7 +383,7 @@ static void free_tx_desc(struct adapter *adap, struct sge_txq *q, if (d->skb) { /* an SGL is present */ if (unmap) unmap_sgl(dev, d->skb, d->sgl, q); - kfree_skb(d->skb); + dev_consume_skb_any(d->skb); d->skb = NULL; } ++d; @@ -509,7 +519,7 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) u32 val; if (q->pend_cred >= 8) { val = PIDX(q->pend_cred / 8); - if (!is_t4(adap->chip)) + if (!is_t4(adap->params.chip)) val |= DBTYPE(1); wmb(); t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) | @@ -706,11 +716,17 @@ static inline unsigned int flits_to_desc(unsigned int n) * @skb: the packet * * Returns whether an Ethernet packet is small enough to fit as - * immediate data. + * immediate data. Return value corresponds to headroom required. */ static inline int is_eth_imm(const struct sk_buff *skb) { - return skb->len <= MAX_IMM_TX_PKT_LEN - sizeof(struct cpl_tx_pkt); + int hdrlen = skb_shinfo(skb)->gso_size ? + sizeof(struct cpl_tx_pkt_lso_core) : 0; + + hdrlen += sizeof(struct cpl_tx_pkt); + if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen) + return hdrlen; + return 0; } /** @@ -723,9 +739,10 @@ static inline int is_eth_imm(const struct sk_buff *skb) static inline unsigned int calc_tx_flits(const struct sk_buff *skb) { unsigned int flits; + int hdrlen = is_eth_imm(skb); - if (is_eth_imm(skb)) - return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt), 8); + if (hdrlen) + return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64)); flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4; if (skb_shinfo(skb)->gso_size) @@ -843,11 +860,12 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src) static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) { unsigned int *wr, index; + unsigned long flags; wmb(); /* write descriptors before telling HW */ - spin_lock(&q->db_lock); + spin_lock_irqsave(&q->db_lock, flags); if (!q->db_disabled) { - if (is_t4(adap->chip)) { + if (is_t4(adap->params.chip)) { t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), QID(q->cntxt_id) | PIDX(n)); } else { @@ -861,9 +879,10 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n) writel(n, adap->bar2 + q->udb + 8); wmb(); } - } + } else + q->db_pidx_inc += n; q->db_pidx = q->pidx; - spin_unlock(&q->db_lock); + spin_unlock_irqrestore(&q->db_lock, flags); } /** @@ -971,6 +990,7 @@ static inline void txq_advance(struct sge_txq *q, unsigned int n) */ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) { + int len; u32 wr_mid; u64 cntrl, *end; int qidx, credits; @@ -982,13 +1002,14 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev) struct cpl_tx_pkt_core *cpl; const struct skb_shared_info *ssi; dma_addr_t addr[MAX_SKB_FRAGS + 1]; + bool immediate = false; /* * The chip min packet length is 10 octets but play safe and reject * anything shorter than an Ethernet header. */ if (unlikely(skb->len < ETH_HLEN)) { -out_free: dev_kfree_skb(skb); +out_free: dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -1011,7 +1032,10 @@ out_free: dev_kfree_skb(skb); return NETDEV_TX_BUSY; } - if (!is_eth_imm(skb) && + if (is_eth_imm(skb)) + immediate = true; + + if (!immediate && unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) { q->mapping_err++; goto out_free; @@ -1028,6 +1052,7 @@ out_free: dev_kfree_skb(skb); wr->r3 = cpu_to_be64(0); end = (u64 *)wr + flits; + len = immediate ? skb->len : 0; ssi = skb_shinfo(skb); if (ssi->gso_size) { struct cpl_tx_pkt_lso *lso = (void *)wr; @@ -1035,8 +1060,9 @@ out_free: dev_kfree_skb(skb); int l3hdr_len = skb_network_header_len(skb); int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN; + len += sizeof(*lso); wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) | - FW_WR_IMMDLEN(sizeof(*lso))); + FW_WR_IMMDLEN(len)); lso->c.lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) | LSO_FIRST_SLICE | LSO_LAST_SLICE | LSO_IPV6(v6) | @@ -1054,9 +1080,7 @@ out_free: dev_kfree_skb(skb); q->tso++; q->tx_cso += ssi->gso_segs; } else { - int len; - - len = is_eth_imm(skb) ? skb->len + sizeof(*cpl) : sizeof(*cpl); + len += sizeof(*cpl); wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) | FW_WR_IMMDLEN(len)); cpl = (void *)(wr + 1); @@ -1078,9 +1102,9 @@ out_free: dev_kfree_skb(skb); cpl->len = htons(skb->len); cpl->ctrl1 = cpu_to_be64(cntrl); - if (is_eth_imm(skb)) { + if (immediate) { inline_tx_skb(skb, &q->q, cpl + 1); - dev_kfree_skb(skb); + dev_consume_skb_any(skb); } else { int last_desc; @@ -1467,8 +1491,12 @@ static inline int ofld_send(struct adapter *adap, struct sk_buff *skb) { unsigned int idx = skb_txq(skb); - if (unlikely(is_ctrl_pkt(skb))) + if (unlikely(is_ctrl_pkt(skb))) { + /* Single ctrl queue is a requirement for LE workaround path */ + if (adap->tids.nsftids) + idx = 0; return ctrl_xmit(&adap->sge.ctrlq[idx], skb); + } return ofld_xmit(&adap->sge.ofldtxq[idx], skb); } @@ -1596,7 +1624,7 @@ static noinline int handle_trace_pkt(struct adapter *adap, return 0; } - if (is_t4(adap->chip)) + if (is_t4(adap->params.chip)) __skb_pull(skb, sizeof(struct cpl_trace_pkt)); else __skb_pull(skb, sizeof(struct cpl_t5_trace_pkt)); @@ -1630,7 +1658,8 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl, skb->ip_summed = CHECKSUM_UNNECESSARY; skb_record_rx_queue(skb, rxq->rspq.idx); if (rxq->rspq.netdev->features & NETIF_F_RXHASH) - skb->rxhash = (__force u32)pkt->rsshdr.hash_val; + skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val, + PKT_HASH_TYPE_L3); if (unlikely(pkt->vlan_ex)) { __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(pkt->vlan)); @@ -1661,14 +1690,15 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, const struct cpl_rx_pkt *pkt; struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq); struct sge *s = &q->adap->sge; - int cpl_trace_pkt = is_t4(q->adap->chip) ? + int cpl_trace_pkt = is_t4(q->adap->params.chip) ? CPL_TRACE_PKT : CPL_TRACE_PKT_T5; if (unlikely(*(u8 *)rsp == cpl_trace_pkt)) return handle_trace_pkt(q->adap, si); pkt = (const struct cpl_rx_pkt *)rsp; - csum_ok = pkt->csum_calc && !pkt->err_vec; + csum_ok = pkt->csum_calc && !pkt->err_vec && + (q->netdev->features & NETIF_F_RXCSUM); if ((pkt->l2info & htonl(RXF_TCP)) && (q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) { do_gro(rxq, si, pkt); @@ -1686,12 +1716,12 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, skb->protocol = eth_type_trans(skb, q->netdev); skb_record_rx_queue(skb, q->idx); if (skb->dev->features & NETIF_F_RXHASH) - skb->rxhash = (__force u32)pkt->rsshdr.hash_val; + skb_set_hash(skb, (__force u32)pkt->rsshdr.hash_val, + PKT_HASH_TYPE_L3); rxq->stats.pkts++; - if (csum_ok && (q->netdev->features & NETIF_F_RXCSUM) && - (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) { + if (csum_ok && (pkt->l2info & htonl(RXF_UDP | RXF_TCP))) { if (!pkt->ip_frag) { skb->ip_summed = CHECKSUM_UNNECESSARY; rxq->stats.rx_cso++; @@ -1990,7 +2020,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap) static void sge_rx_timer_cb(unsigned long data) { unsigned long m; - unsigned int i, cnt[2]; + unsigned int i, idma_same_state_cnt[2]; struct adapter *adap = (struct adapter *)data; struct sge *s = &adap->sge; @@ -2001,7 +2031,7 @@ static void sge_rx_timer_cb(unsigned long data) struct sge_fl *fl = s->egr_map[id]; clear_bit(id, s->starving_fl); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); if (fl_starving(fl)) { rxq = container_of(fl, struct sge_eth_rxq, fl); @@ -2013,21 +2043,64 @@ static void sge_rx_timer_cb(unsigned long data) } t4_write_reg(adap, SGE_DEBUG_INDEX, 13); - cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH); - cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW); - - for (i = 0; i < 2; i++) - if (cnt[i] >= s->starve_thres) { - if (s->idma_state[i] || cnt[i] == 0xffffffff) - continue; - s->idma_state[i] = 1; - t4_write_reg(adap, SGE_DEBUG_INDEX, 11); - m = t4_read_reg(adap, SGE_DEBUG_DATA_LOW) >> (i * 16); - dev_warn(adap->pdev_dev, - "SGE idma%u starvation detected for " - "queue %lu\n", i, m & 0xffff); - } else if (s->idma_state[i]) - s->idma_state[i] = 0; + idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH); + idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW); + + for (i = 0; i < 2; i++) { + u32 debug0, debug11; + + /* If the Ingress DMA Same State Counter ("timer") is less + * than 1s, then we can reset our synthesized Stall Timer and + * continue. If we have previously emitted warnings about a + * potential stalled Ingress Queue, issue a note indicating + * that the Ingress Queue has resumed forward progress. + */ + if (idma_same_state_cnt[i] < s->idma_1s_thresh) { + if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH) + CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n", + i, s->idma_qid[i], + s->idma_stalled[i]/HZ); + s->idma_stalled[i] = 0; + continue; + } + + /* Synthesize an SGE Ingress DMA Same State Timer in the Hz + * domain. The first time we get here it'll be because we + * passed the 1s Threshold; each additional time it'll be + * because the RX Timer Callback is being fired on its regular + * schedule. + * + * If the stall is below our Potential Hung Ingress Queue + * Warning Threshold, continue. + */ + if (s->idma_stalled[i] == 0) + s->idma_stalled[i] = HZ; + else + s->idma_stalled[i] += RX_QCHECK_PERIOD; + + if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH) + continue; + + /* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */ + if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0) + continue; + + /* Read and save the SGE IDMA State and Queue ID information. + * We do this every time in case it changes across time ... + */ + t4_write_reg(adap, SGE_DEBUG_INDEX, 0); + debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW); + s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f; + + t4_write_reg(adap, SGE_DEBUG_INDEX, 11); + debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW); + s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff; + + CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n", + i, s->idma_qid[i], s->idma_state[i], + s->idma_stalled[i]/HZ, debug0, debug11); + t4_sge_decode_idma_state(adap, s->idma_state[i]); + } mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD); } @@ -2142,7 +2215,6 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, iq->cntxt_id = ntohs(c.iqid); iq->abs_id = ntohs(c.physiqid); iq->size--; /* subtract status entry */ - iq->adap = adap; iq->netdev = dev; iq->handler = hnd; @@ -2182,7 +2254,7 @@ err: static void init_txq(struct adapter *adap, struct sge_txq *q, unsigned int id) { q->cntxt_id = id; - if (!is_t4(adap->chip)) { + if (!is_t4(adap->params.chip)) { unsigned int s_qpp; unsigned short udb_density; unsigned long qpshift; @@ -2442,6 +2514,10 @@ void t4_free_sge_resources(struct adapter *adap) if (oq->rspq.desc) free_rspq_fl(adap, &oq->rspq, &oq->fl); } + for (i = 0, oq = adap->sge.rdmaciq; i < adap->sge.rdmaciqs; i++, oq++) { + if (oq->rspq.desc) + free_rspq_fl(adap, &oq->rspq, &oq->fl); + } /* clean up offload Tx queues */ for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) { @@ -2578,11 +2654,19 @@ static int t4_sge_init_soft(struct adapter *adap) fl_small_mtu = READ_FL_BUF(RX_SMALL_MTU_BUF); fl_large_mtu = READ_FL_BUF(RX_LARGE_MTU_BUF); + /* We only bother using the Large Page logic if the Large Page Buffer + * is larger than our Page Size Buffer. + */ + if (fl_large_pg <= fl_small_pg) + fl_large_pg = 0; + #undef READ_FL_BUF + /* The Page Size Buffer must be exactly equal to our Page Size and the + * Large Page Size Buffer should be 0 (per above) or a power of 2. + */ if (fl_small_pg != PAGE_SIZE || - (fl_large_pg != 0 && (fl_large_pg <= fl_small_pg || - (fl_large_pg & (fl_large_pg-1)) != 0))) { + (fl_large_pg & (fl_large_pg-1)) != 0) { dev_err(adap->pdev_dev, "bad SGE FL page buffer sizes [%d, %d]\n", fl_small_pg, fl_large_pg); return -EINVAL; @@ -2641,7 +2725,7 @@ static int t4_sge_init_hard(struct adapter *adap) * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows * and generate an interrupt when this occurs so we can recover. */ - if (is_t4(adap->chip)) { + if (is_t4(adap->params.chip)) { t4_set_reg_field(adap, A_SGE_DBFIFO_STATUS, V_HP_INT_THRESH(M_HP_INT_THRESH) | V_LP_INT_THRESH(M_LP_INT_THRESH), @@ -2697,8 +2781,8 @@ static int t4_sge_init_hard(struct adapter *adap) int t4_sge_init(struct adapter *adap) { struct sge *s = &adap->sge; - u32 sge_control; - int ret; + u32 sge_control, sge_conm_ctrl; + int ret, egress_threshold; /* * Ingress Padding Boundary and Egress Status Page Size are set up by @@ -2723,15 +2807,24 @@ int t4_sge_init(struct adapter *adap) * SGE's Egress Congestion Threshold. If it isn't, then we can get * stuck waiting for new packets while the SGE is waiting for us to * give it more Free List entries. (Note that the SGE's Egress - * Congestion Threshold is in units of 2 Free List pointers.) + * Congestion Threshold is in units of 2 Free List pointers.) For T4, + * there was only a single field to control this. For T5 there's the + * original field which now only applies to Unpacked Mode Free List + * buffers and a new field which only applies to Packed Mode Free List + * buffers. */ - s->fl_starve_thres - = EGRTHRESHOLD_GET(t4_read_reg(adap, SGE_CONM_CTRL))*2 + 1; + sge_conm_ctrl = t4_read_reg(adap, SGE_CONM_CTRL); + if (is_t4(adap->params.chip)) + egress_threshold = EGRTHRESHOLD_GET(sge_conm_ctrl); + else + egress_threshold = EGRTHRESHOLDPACKING_GET(sge_conm_ctrl); + s->fl_starve_thres = 2*egress_threshold + 1; setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap); setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap); - s->starve_thres = core_ticks_per_usec(adap) * 1000000; /* 1 s */ - s->idma_state[0] = s->idma_state[1] = 0; + s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000; /* 1 s */ + s->idma_stalled[0] = 0; + s->idma_stalled[1] = 0; spin_lock_init(&s->intrq_lock); return 0; |
