diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
| -rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1109 |
1 files changed, 818 insertions, 291 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 49d2cfa9b0c..e49f31dbd5d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 Intel Corporation. + * Copyright(c) 2013 - 2014 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -12,9 +12,8 @@ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. * * The full GNU General Public License is included in this distribution in * the file called "COPYING". @@ -25,7 +24,9 @@ * ******************************************************************************/ +#include <linux/prefetch.h> #include "i40e.h" +#include "i40e_prototype.h" static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, u32 td_tag) @@ -37,19 +38,22 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT)); } +#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) /** * i40e_program_fdir_filter - Program a Flow Director filter - * @fdir_input: Packet data that will be filter parameters + * @fdir_data: Packet data that will be filter parameters + * @raw_packet: the pre-allocated packet buffer for FDir * @pf: The pf pointer * @add: True for add/update, False for remove **/ -int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, +int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, struct i40e_pf *pf, bool add) { struct i40e_filter_program_desc *fdir_desc; struct i40e_tx_buffer *tx_buf; struct i40e_tx_desc *tx_desc; struct i40e_ring *tx_ring; + unsigned int fpt, dcc; struct i40e_vsi *vsi; struct device *dev; dma_addr_t dma; @@ -58,98 +62,90 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, /* find existing FDIR VSI */ vsi = NULL; - for (i = 0; i < pf->hw.func_caps.num_vsis; i++) + for (i = 0; i < pf->num_alloc_vsi; i++) if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) vsi = pf->vsi[i]; if (!vsi) return -ENOENT; - tx_ring = &vsi->tx_rings[0]; + tx_ring = vsi->tx_rings[0]; dev = tx_ring->dev; - dma = dma_map_single(dev, fdir_data->raw_packet, - I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE); + dma = dma_map_single(dev, raw_packet, + I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto dma_fail; /* grab the next descriptor */ - fdir_desc = I40E_TX_FDIRDESC(tx_ring, tx_ring->next_to_use); - tx_buf = &tx_ring->tx_bi[tx_ring->next_to_use]; - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; + i = tx_ring->next_to_use; + fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); + + tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0; - fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32((fdir_data->q_index - << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) - & I40E_TXD_FLTR_QW0_QINDEX_MASK); + fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & + I40E_TXD_FLTR_QW0_QINDEX_MASK; - fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->flex_off - << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) - & I40E_TXD_FLTR_QW0_FLEXOFF_MASK); + fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) & + I40E_TXD_FLTR_QW0_FLEXOFF_MASK; - fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->pctype - << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) - & I40E_TXD_FLTR_QW0_PCTYPE_MASK); + fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) & + I40E_TXD_FLTR_QW0_PCTYPE_MASK; /* Use LAN VSI Id if not programmed by user */ if (fdir_data->dest_vsi == 0) - fdir_desc->qindex_flex_ptype_vsi |= - cpu_to_le32((pf->vsi[pf->lan_vsi]->id) - << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT); + fpt |= (pf->vsi[pf->lan_vsi]->id) << + I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; else - fdir_desc->qindex_flex_ptype_vsi |= - cpu_to_le32((fdir_data->dest_vsi - << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) - & I40E_TXD_FLTR_QW0_DEST_VSI_MASK); + fpt |= ((u32)fdir_data->dest_vsi << + I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) & + I40E_TXD_FLTR_QW0_DEST_VSI_MASK; + + fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt); - fdir_desc->dtype_cmd_cntindex = - cpu_to_le32(I40E_TX_DESC_DTYPE_FILTER_PROG); + dcc = I40E_TX_DESC_DTYPE_FILTER_PROG; if (add) - fdir_desc->dtype_cmd_cntindex |= cpu_to_le32( - I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE - << I40E_TXD_FLTR_QW1_PCMD_SHIFT); + dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT; else - fdir_desc->dtype_cmd_cntindex |= cpu_to_le32( - I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE - << I40E_TXD_FLTR_QW1_PCMD_SHIFT); + dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT; - fdir_desc->dtype_cmd_cntindex |= cpu_to_le32((fdir_data->dest_ctl - << I40E_TXD_FLTR_QW1_DEST_SHIFT) - & I40E_TXD_FLTR_QW1_DEST_MASK); + dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) & + I40E_TXD_FLTR_QW1_DEST_MASK; - fdir_desc->dtype_cmd_cntindex |= cpu_to_le32( - (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) - & I40E_TXD_FLTR_QW1_FD_STATUS_MASK); + dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) & + I40E_TXD_FLTR_QW1_FD_STATUS_MASK; if (fdir_data->cnt_index != 0) { - fdir_desc->dtype_cmd_cntindex |= - cpu_to_le32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK); - fdir_desc->dtype_cmd_cntindex |= - cpu_to_le32((fdir_data->cnt_index - << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) - & I40E_TXD_FLTR_QW1_CNTINDEX_MASK); + dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; + dcc |= ((u32)fdir_data->cnt_index << + I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & + I40E_TXD_FLTR_QW1_CNTINDEX_MASK; } + fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc); fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); /* Now program a dummy descriptor */ - tx_desc = I40E_TX_DESC(tx_ring, tx_ring->next_to_use); - tx_buf = &tx_ring->tx_bi[tx_ring->next_to_use]; - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; + i = tx_ring->next_to_use; + tx_desc = I40E_TX_DESC(tx_ring, i); + tx_buf = &tx_ring->tx_bi[i]; + + tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE); + dma_unmap_addr_set(tx_buf, dma, dma); tx_desc->buffer_addr = cpu_to_le64(dma); - td_cmd = I40E_TX_DESC_CMD_EOP | - I40E_TX_DESC_CMD_RS | - I40E_TX_DESC_CMD_DUMMY; + td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY; tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0); + build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0); - /* Mark the data descriptor to be watched */ - tx_buf->next_to_watch = tx_desc; + /* set the timestamp */ + tx_buf->time_stamp = jiffies; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -158,6 +154,9 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, */ wmb(); + /* Mark the data descriptor to be watched */ + tx_buf->next_to_watch = tx_desc; + writel(tx_ring->next_to_use, tx_ring->tail); return 0; @@ -165,50 +164,349 @@ dma_fail: return -1; } +#define IP_HEADER_OFFSET 14 +#define I40E_UDPIP_DUMMY_PACKET_LEN 42 +/** + * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @raw_packet: the pre-allocated packet buffer for FDir + * @add: true adds a filter, false removes it + * + * Returns 0 if the filters were successfully added or removed + **/ +static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + u8 *raw_packet, bool add) +{ + struct i40e_pf *pf = vsi->back; + struct udphdr *udp; + struct iphdr *ip; + bool err = false; + int ret; + static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, + 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN); + + ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); + udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET + + sizeof(struct iphdr)); + + ip->daddr = fd_data->dst_ip[0]; + udp->dest = fd_data->dst_port; + ip->saddr = fd_data->src_ip[0]; + udp->source = fd_data->src_port; + + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + if (ret) { + dev_info(&pf->pdev->dev, + "Filter command send failed for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + err = true; + } else { + dev_info(&pf->pdev->dev, + "Filter OK for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + } + + return err ? -EOPNOTSUPP : 0; +} + +#define I40E_TCPIP_DUMMY_PACKET_LEN 54 +/** + * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @raw_packet: the pre-allocated packet buffer for FDir + * @add: true adds a filter, false removes it + * + * Returns 0 if the filters were successfully added or removed + **/ +static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + u8 *raw_packet, bool add) +{ + struct i40e_pf *pf = vsi->back; + struct tcphdr *tcp; + struct iphdr *ip; + bool err = false; + int ret; + /* Dummy packet */ + static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, + 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11, + 0x0, 0x72, 0, 0, 0, 0}; + + memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN); + + ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); + tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET + + sizeof(struct iphdr)); + + ip->daddr = fd_data->dst_ip[0]; + tcp->dest = fd_data->dst_port; + ip->saddr = fd_data->src_ip[0]; + tcp->source = fd_data->src_port; + + if (add) { + if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { + dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); + pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + } + } + + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + + if (ret) { + dev_info(&pf->pdev->dev, + "Filter command send failed for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + err = true; + } else { + dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + } + + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + if (ret) { + dev_info(&pf->pdev->dev, + "Filter command send failed for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + err = true; + } else { + dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + } + + return err ? -EOPNOTSUPP : 0; +} + +/** + * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for + * a specific flow spec + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @raw_packet: the pre-allocated packet buffer for FDir + * @add: true adds a filter, false removes it + * + * Always returns -EOPNOTSUPP + **/ +static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + u8 *raw_packet, bool add) +{ + return -EOPNOTSUPP; +} + +#define I40E_IP_DUMMY_PACKET_LEN 34 +/** + * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for + * a specific flow spec + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @raw_packet: the pre-allocated packet buffer for FDir + * @add: true adds a filter, false removes it + * + * Returns 0 if the filters were successfully added or removed + **/ +static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + u8 *raw_packet, bool add) +{ + struct i40e_pf *pf = vsi->back; + struct iphdr *ip; + bool err = false; + int ret; + int i; + static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, + 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0}; + + memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN); + ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); + + ip->saddr = fd_data->src_ip[0]; + ip->daddr = fd_data->dst_ip[0]; + ip->protocol = 0; + + for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; + i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) { + fd_data->pctype = i; + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + + if (ret) { + dev_info(&pf->pdev->dev, + "Filter command send failed for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + err = true; + } else { + dev_info(&pf->pdev->dev, + "Filter OK for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + } + } + + return err ? -EOPNOTSUPP : 0; +} + +/** + * i40e_add_del_fdir - Build raw packets to add/del fdir filter + * @vsi: pointer to the targeted VSI + * @cmd: command to get or set RX flow classification rules + * @add: true adds a filter, false removes it + * + **/ +int i40e_add_del_fdir(struct i40e_vsi *vsi, + struct i40e_fdir_filter *input, bool add) +{ + struct i40e_pf *pf = vsi->back; + u8 *raw_packet; + int ret; + + /* Populate the Flow Director that we have at the moment + * and allocate the raw packet buffer for the calling functions + */ + raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); + if (!raw_packet) + return -ENOMEM; + + switch (input->flow_type & ~FLOW_EXT) { + case TCP_V4_FLOW: + ret = i40e_add_del_fdir_tcpv4(vsi, input, raw_packet, + add); + break; + case UDP_V4_FLOW: + ret = i40e_add_del_fdir_udpv4(vsi, input, raw_packet, + add); + break; + case SCTP_V4_FLOW: + ret = i40e_add_del_fdir_sctpv4(vsi, input, raw_packet, + add); + break; + case IPV4_FLOW: + ret = i40e_add_del_fdir_ipv4(vsi, input, raw_packet, + add); + break; + case IP_USER_FLOW: + switch (input->ip4_proto) { + case IPPROTO_TCP: + ret = i40e_add_del_fdir_tcpv4(vsi, input, + raw_packet, add); + break; + case IPPROTO_UDP: + ret = i40e_add_del_fdir_udpv4(vsi, input, + raw_packet, add); + break; + case IPPROTO_SCTP: + ret = i40e_add_del_fdir_sctpv4(vsi, input, + raw_packet, add); + break; + default: + ret = i40e_add_del_fdir_ipv4(vsi, input, + raw_packet, add); + break; + } + break; + default: + dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", + input->flow_type); + ret = -EINVAL; + } + + kfree(raw_packet); + return ret; +} + /** * i40e_fd_handle_status - check the Programming Status for FD * @rx_ring: the Rx ring for this descriptor - * @qw: the descriptor data + * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor. * @prog_id: the id originally used for programming * * This is used to verify if the FD programming or invalidation * requested by SW to the HW is successful or not and take actions accordingly. **/ -static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u32 qw, u8 prog_id) +static void i40e_fd_handle_status(struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, u8 prog_id) { - struct pci_dev *pdev = rx_ring->vsi->back->pdev; + struct i40e_pf *pf = rx_ring->vsi->back; + struct pci_dev *pdev = pf->pdev; + u32 fcnt_prog, fcnt_avail; u32 error; + u64 qw; + qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; - /* for now just print the Status */ - dev_info(&pdev->dev, "FD programming id %02x, Status %08x\n", - prog_id, error); + if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { + dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n", + rx_desc->wb.qword0.hi_dword.fd_id); + + /* filter programming failed most likely due to table full */ + fcnt_prog = i40e_get_current_fd_count(pf); + fcnt_avail = i40e_get_fd_cnt_all(pf); + /* If ATR is running fcnt_prog can quickly change, + * if we are very close to full, it makes sense to disable + * FD ATR/SB and then re-enable it when there is room. + */ + if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { + /* Turn off ATR first */ + if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { + pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + dev_warn(&pdev->dev, "FD filter space full, ATR for further flows will be turned off\n"); + pf->auto_disable_flags |= + I40E_FLAG_FD_ATR_ENABLED; + pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT; + } else if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); + pf->auto_disable_flags |= + I40E_FLAG_FD_SB_ENABLED; + pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT; + } + } else { + dev_info(&pdev->dev, "FD filter programming error\n"); + } + } else if (error == + (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pdev->dev, "ntuple filter loc = %d, could not be removed\n", + rx_desc->wb.qword0.hi_dword.fd_id); + } } /** - * i40e_unmap_tx_resource - Release a Tx buffer + * i40e_unmap_and_free_tx_resource - Release a Tx buffer * @ring: the ring that owns the buffer * @tx_buffer: the buffer to free **/ -static inline void i40e_unmap_tx_resource(struct i40e_ring *ring, - struct i40e_tx_buffer *tx_buffer) +static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, + struct i40e_tx_buffer *tx_buffer) { - if (tx_buffer->dma) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_MAPPED_AS_PAGE) - dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, - DMA_TO_DEVICE); - else + if (tx_buffer->skb) { + dev_kfree_skb_any(tx_buffer->skb); + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); } - tx_buffer->dma = 0; - tx_buffer->time_stamp = 0; + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* tx_buffer must be completely set up in the transmit path */ } /** @@ -217,7 +515,6 @@ static inline void i40e_unmap_tx_resource(struct i40e_ring *ring, **/ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) { - struct i40e_tx_buffer *tx_buffer; unsigned long bi_size; u16 i; @@ -226,13 +523,8 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) return; /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) { - tx_buffer = &tx_ring->tx_bi[i]; - i40e_unmap_tx_resource(tx_ring, tx_buffer); - if (tx_buffer->skb) - dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; - } + for (i = 0; i < tx_ring->count; i++) + i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; memset(tx_ring->tx_bi, 0, bi_size); @@ -242,6 +534,13 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + + if (!tx_ring->netdev) + return; + + /* cleanup Tx queue statistics */ + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)); } /** @@ -300,14 +599,14 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) * run the check_tx_hang logic with a transmit completion * pending but without time to complete it yet. */ - if ((tx_ring->tx_stats.tx_done_old == tx_ring->tx_stats.packets) && + if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && tx_pending) { /* make sure it is true for two checks in a row */ ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); } else { /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_ring->tx_stats.packets; + tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); } @@ -315,6 +614,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) } /** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} + +/** * i40e_clean_tx_irq - Reclaim resources after transmit completes * @tx_ring: tx ring to clean * @budget: how many cleans we're allowed @@ -325,68 +638,96 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) { u16 i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; + struct i40e_tx_desc *tx_head; struct i40e_tx_desc *tx_desc; unsigned int total_packets = 0; unsigned int total_bytes = 0; tx_buf = &tx_ring->tx_bi[i]; tx_desc = I40E_TX_DESC(tx_ring, i); + i -= tx_ring->count; - for (; budget; budget--) { - struct i40e_tx_desc *eop_desc; + tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); - eop_desc = tx_buf->next_to_watch; + do { + struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; - /* if the descriptor isn't done, no work yet to do */ - if (!(eop_desc->cmd_type_offset_bsz & - cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) + /* prevent any other reads prior to eop_desc */ + read_barrier_depends(); + + /* we have caught up to head, no work left to do */ + if (tx_head == tx_desc) break; - /* count the packet as being completed */ - tx_ring->tx_stats.completed++; + /* clear next_to_watch to prevent false hangs */ tx_buf->next_to_watch = NULL; - tx_buf->time_stamp = 0; - /* set memory barrier before eop_desc is verified */ - rmb(); + /* update the statistics for this packet */ + total_bytes += tx_buf->bytecount; + total_packets += tx_buf->gso_segs; - do { - i40e_unmap_tx_resource(tx_ring, tx_buf); + /* free the skb */ + dev_kfree_skb_any(tx_buf->skb); - /* clear dtype status */ - tx_desc->cmd_type_offset_bsz &= - ~cpu_to_le64(I40E_TXD_QW1_DTYPE_MASK); + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); - if (likely(tx_desc == eop_desc)) { - eop_desc = NULL; + /* clear tx_buffer data */ + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); - dev_kfree_skb_any(tx_buf->skb); - tx_buf->skb = NULL; - - total_bytes += tx_buf->bytecount; - total_packets += tx_buf->gso_segs; - } + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { tx_buf++; tx_desc++; i++; - if (unlikely(i == tx_ring->count)) { - i = 0; + if (unlikely(!i)) { + i -= tx_ring->count; tx_buf = tx_ring->tx_bi; tx_desc = I40E_TX_DESC(tx_ring, 0); } - } while (eop_desc); - } + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_bi; + tx_desc = I40E_TX_DESC(tx_ring, 0); + } + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; tx_ring->next_to_clean = i; - tx_ring->tx_stats.bytes += total_bytes; - tx_ring->tx_stats.packets += total_packets; + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->stats.bytes += total_bytes; + tx_ring->stats.packets += total_packets; + u64_stats_update_end(&tx_ring->syncp); tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; + if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { /* schedule immediate reset if we believe we hung */ dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" @@ -414,6 +755,10 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) return true; } + netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + total_packets, total_bytes); + #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -524,8 +869,6 @@ static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector) i40e_set_new_dynamic_itr(&q_vector->tx); if (old_itr != q_vector->tx.itr) wr32(hw, reg_addr, q_vector->tx.itr); - - i40e_flush(hw); } /** @@ -549,7 +892,7 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring, I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) - i40e_fd_handle_status(rx_ring, qw, id); + i40e_fd_handle_status(rx_ring, rx_desc, id); } /** @@ -573,6 +916,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); + /* add u32 for head writeback, align after this takes care of + * guaranteeing this is at least one cache line in size + */ + tx_ring->size += sizeof(u32); tx_ring->size = ALIGN(tx_ring->size, 4096); tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); @@ -746,7 +1093,7 @@ void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) skb = netdev_alloc_skb_ip_align(rx_ring->netdev, rx_ring->rx_buf_len); if (!skb) { - rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; } /* initialize queue mapping */ @@ -760,7 +1107,7 @@ void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) rx_ring->rx_buf_len, DMA_FROM_DEVICE); if (dma_mapping_error(rx_ring->dev, bi->dma)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; goto no_buffers; } @@ -770,7 +1117,7 @@ void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { - rx_ring->rx_stats.alloc_rx_page_failed++; + rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } } @@ -785,7 +1132,7 @@ void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) DMA_FROM_DEVICE); if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { - rx_ring->rx_stats.alloc_rx_page_failed++; + rx_ring->rx_stats.alloc_page_failed++; bi->page_dma = 0; goto no_buffers; } @@ -838,27 +1185,107 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring, * @skb: skb currently being received and modified * @rx_status: status value of last descriptor in packet * @rx_error: error value of last descriptor in packet + * @rx_ptype: ptype value of last descriptor in packet **/ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, struct sk_buff *skb, u32 rx_status, - u32 rx_error) + u32 rx_error, + u16 rx_ptype) { + struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); + bool ipv4 = false, ipv6 = false; + bool ipv4_tunnel, ipv6_tunnel; + __wsum rx_udp_csum; + struct iphdr *iph; + __sum16 csum; + + ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + + skb->encapsulation = ipv4_tunnel || ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM && - rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) + if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + return; + + /* did the hardware decode the packet and checksum? */ + if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) + return; + + /* both known and outer_ip must be set for the below code to work */ + if (!(decoded.known && decoded.outer_ip)) return; - /* IP or L4 checksum error */ - if (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | - (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { - vsi->back->hw_csum_rx_error++; + if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) + ipv4 = true; + else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) + ipv6 = true; + + if (ipv4 && + (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | + (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))) + goto checksum_fail; + + /* likely incorrect csum if alternate IP extension headers found */ + if (ipv6 && + decoded.inner_prot == I40E_RX_PTYPE_INNER_PROT_TCP && + rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT) && + rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) + /* don't increment checksum err here, non-fatal err */ return; + + /* there was some L4 error, count error and punt packet to the stack */ + if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) + goto checksum_fail; + + /* handle packets that were not able to be checksummed due + * to arrival speed, in this case the stack can compute + * the csum. + */ + if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT)) + return; + + /* If VXLAN traffic has an outer UDPv4 checksum we need to check + * it in the driver, hardware does not do it for us. + * Since L3L4P bit was set we assume a valid IHL value (>=5) + * so the total length of IPv4 header is IHL*4 bytes + * The UDP_0 bit *may* bet set if the *inner* header is UDP + */ + if (ipv4_tunnel && + (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) && + !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) { + skb->transport_header = skb->mac_header + + sizeof(struct ethhdr) + + (ip_hdr(skb)->ihl * 4); + + /* Add 4 bytes for VLAN tagged packets */ + skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) + ? VLAN_HLEN : 0; + + rx_udp_csum = udp_csum(skb); + iph = ip_hdr(skb); + csum = csum_tcpudp_magic( + iph->saddr, iph->daddr, + (skb->len - skb_transport_offset(skb)), + IPPROTO_UDP, rx_udp_csum); + + if (udp_hdr(skb)->check != csum) + goto checksum_fail; } skb->ip_summed = CHECKSUM_UNNECESSARY; + + return; + +checksum_fail: + vsi->back->hw_csum_rx_error++; } /** @@ -869,13 +1296,38 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, static inline u32 i40e_rx_hash(struct i40e_ring *ring, union i40e_rx_desc *rx_desc) { - if (ring->netdev->features & NETIF_F_RXHASH) { - if ((le64_to_cpu(rx_desc->wb.qword1.status_error_len) >> - I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) & - I40E_RX_DESC_FLTSTAT_RSS_HASH) - return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - } - return 0; + const __le64 rss_mask = + cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << + I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); + + if ((ring->netdev->features & NETIF_F_RXHASH) && + (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) + return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); + else + return 0; +} + +/** + * i40e_ptype_to_hash - get a hash type + * @ptype: the ptype value from the descriptor + * + * Returns a hash type to be used by skb_set_hash + **/ +static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype) +{ + struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); + + if (!decoded.known) + return PKT_HASH_TYPE_NONE; + + if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) + return PKT_HASH_TYPE_L4; + else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) + return PKT_HASH_TYPE_L3; + else + return PKT_HASH_TYPE_L2; } /** @@ -895,12 +1347,16 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + u8 rx_ptype; u64 qword; + if (budget <= 0) + return 0; + rx_desc = I40E_RX_DESC(rx_ring, i); qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) - >> I40E_RXD_QW1_STATUS_SHIFT; + rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> + I40E_RXD_QW1_STATUS_SHIFT; while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) { union i40e_rx_desc *next_rxd; @@ -916,18 +1372,20 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) skb = rx_bi->skb; prefetch(skb->data); - rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) - >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; - rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) - >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; - rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) - >> I40E_RXD_QW1_LENGTH_SPH_SHIFT; + rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >> + I40E_RXD_QW1_LENGTH_HBUF_SHIFT; + rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >> + I40E_RXD_QW1_LENGTH_SPH_SHIFT; - rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) - >> I40E_RXD_QW1_ERROR_SHIFT; + rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> + I40E_RXD_QW1_ERROR_SHIFT; rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT); rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); + rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> + I40E_RXD_QW1_PTYPE_SHIFT; rx_bi->skb = NULL; /* This memory barrier is needed to keep us from reading @@ -1004,17 +1462,29 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* ERR_MASK will only have valid bits if EOP set */ if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { dev_kfree_skb_any(skb); + /* TODO: shouldn't we increment a counter indicating the + * drop? + */ goto next_desc; } - skb->rxhash = i40e_rx_hash(rx_ring, rx_desc); - i40e_rx_checksum(vsi, skb, rx_status, rx_error); + skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), + i40e_ptype_to_hash(rx_ptype)); + if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) { + i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status & + I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> + I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT); + rx_ring->last_rx_timestamp = jiffies; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; total_rx_packets++; skb->protocol = eth_type_trans(skb, rx_ring->netdev); + + i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); + vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; @@ -1037,13 +1507,15 @@ next_desc: /* use prefetched values */ rx_desc = next_rxd; qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) - >> I40E_RXD_QW1_STATUS_SHIFT; + rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> + I40E_RXD_QW1_STATUS_SHIFT; } rx_ring->next_to_clean = i; - rx_ring->rx_stats.packets += total_rx_packets; - rx_ring->rx_stats.bytes += total_rx_bytes; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.packets += total_rx_packets; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; @@ -1067,27 +1539,28 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) struct i40e_q_vector *q_vector = container_of(napi, struct i40e_q_vector, napi); struct i40e_vsi *vsi = q_vector->vsi; + struct i40e_ring *ring; bool clean_complete = true; int budget_per_ring; - int i; if (test_bit(__I40E_DOWN, &vsi->state)) { napi_complete(napi); return 0; } + /* Since the actual Tx work is minimal, we can give the Tx a larger + * budget and be more aggressive about cleaning up the Tx descriptors. + */ + i40e_for_each_ring(ring, q_vector->tx) + clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + /* We attempt to distribute budget to each Rx queue fairly, but don't * allow the budget to go below 1 because that would exit polling early. - * Since the actual Tx work is minimal, we can give the Tx a larger - * budget and be more aggressive about cleaning up the Tx descriptors. */ budget_per_ring = max(budget/q_vector->num_ringpairs, 1); - for (i = 0; i < q_vector->num_ringpairs; i++) { - clean_complete &= i40e_clean_tx_irq(q_vector->tx.ring[i], - vsi->work_limit); - clean_complete &= i40e_clean_rx_irq(q_vector->rx.ring[i], - budget_per_ring); - } + + i40e_for_each_ring(ring, q_vector->rx) + clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring); /* If work not completed, return budget and polling will return */ if (!clean_complete) @@ -1117,7 +1590,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) qval = rd32(hw, I40E_QINT_TQCTL(0)); qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK; wr32(hw, I40E_QINT_TQCTL(0), qval); - i40e_flush(hw); + + i40e_irq_dynamic_enable_icr0(vsi->back); } } @@ -1144,17 +1618,16 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, struct tcphdr *th; unsigned int hlen; u32 flex_ptype, dtype_cmd; + u16 i; /* make sure ATR is enabled */ - if (!(pf->flags & I40E_FLAG_FDIR_ATR_ENABLED)) + if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) return; /* if sampling is disabled do nothing */ if (!tx_ring->atr_sample_rate) return; - tx_ring->atr_count++; - /* snag network header to get L4 type and address */ hdr.network = skb_network_header(skb); @@ -1176,17 +1649,27 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, th = (struct tcphdr *)(hdr.network + hlen); - /* sample on all syn/fin packets or once every atr sample rate */ - if (!th->fin && !th->syn && (tx_ring->atr_count < tx_ring->atr_sample_rate)) + /* Due to lack of space, no more new filters can be programmed */ + if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) + return; + + tx_ring->atr_count++; + + /* sample on all syn/fin/rst packets or once every atr sample rate */ + if (!th->fin && + !th->syn && + !th->rst && + (tx_ring->atr_count < tx_ring->atr_sample_rate)) return; tx_ring->atr_count = 0; /* grab the next descriptor */ - fdir_desc = I40E_TX_FDIRDESC(tx_ring, tx_ring->next_to_use); - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; + i = tx_ring->next_to_use; + fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & I40E_TXD_FLTR_QW0_QINDEX_MASK; @@ -1200,7 +1683,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; - dtype_cmd |= th->fin ? + dtype_cmd |= (th->fin || th->rst) ? (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << I40E_TXD_FLTR_QW1_PCMD_SHIFT) : (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << @@ -1212,11 +1695,15 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; + dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; + dtype_cmd |= + ((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & + I40E_TXD_FLTR_QW1_CNTINDEX_MASK; + fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); } -#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) /** * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW * @skb: send buffer @@ -1241,7 +1728,7 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; tx_flags |= I40E_TX_FLAGS_HW_VLAN; /* else if it is a SW VLAN, check the next protocol and store the tag */ - } else if (protocol == __constant_htons(ETH_P_8021Q)) { + } else if (protocol == htons(ETH_P_8021Q)) { struct vlan_hdr *vhdr, _vhdr; vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); if (!vhdr) @@ -1261,9 +1748,11 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, I40E_TX_FLAGS_VLAN_PRIO_SHIFT; if (tx_flags & I40E_TX_FLAGS_SW_VLAN) { struct vlan_ethhdr *vhdr; - if (skb_header_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; + int rc; + + rc = skb_cow_head(skb, 0); + if (rc < 0) + return rc; vhdr = (struct vlan_ethhdr *)skb->data; vhdr->h_vlan_TCI = htons(tx_flags >> I40E_TX_FLAGS_VLAN_SHIFT); @@ -1276,27 +1765,6 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, } /** - * i40e_tx_csum - is checksum offload requested - * @tx_ring: ptr to the ring to send - * @skb: ptr to the skb we're sending - * @tx_flags: the collected send information - * @protocol: the send protocol - * - * Returns true if checksum offload is requested - **/ -static bool i40e_tx_csum(struct i40e_ring *tx_ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol) -{ - if ((skb->ip_summed != CHECKSUM_PARTIAL) && - !(tx_flags & I40E_TX_FLAGS_TXSW)) { - if (!(tx_flags & I40E_TX_FLAGS_HW_VLAN)) - return false; - } - - return skb->ip_summed == CHECKSUM_PARTIAL; -} - -/** * i40e_tso - set up the tso context descriptor * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending @@ -1312,22 +1780,20 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling) { u32 cd_cmd, cd_tso_len, cd_mss; + struct ipv6hdr *ipv6h; struct tcphdr *tcph; struct iphdr *iph; u32 l4len; int err; - struct ipv6hdr *ipv6h; if (!skb_is_gso(skb)) return 0; - if (skb_header_cloned(skb)) { - err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (err) - return err; - } + err = skb_cow_head(skb, 0); + if (err < 0) + return err; - if (protocol == __constant_htons(ETH_P_IP)) { + if (protocol == htons(ETH_P_IP)) { iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); iph->tot_len = 0; @@ -1353,10 +1819,47 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) - | ((u64)cd_tso_len - << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) - | ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + ((u64)cd_tso_len << + I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + return 1; +} + +/** + * i40e_tsyn - set up the tsyn context descriptor + * @tx_ring: ptr to the ring to send + * @skb: ptr to the skb we're sending + * @tx_flags: the collected send information + * + * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen + **/ +static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, + u32 tx_flags, u64 *cd_type_cmd_tso_mss) +{ + struct i40e_pf *pf; + + if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) + return 0; + + /* Tx timestamps cannot be sampled when doing TSO */ + if (tx_flags & I40E_TX_FLAGS_TSO) + return 0; + + /* only timestamp the outbound packet if the user has requested it and + * we are not already transmitting a packet to be timestamped + */ + pf = i40e_netdev_to_pf(tx_ring->netdev); + if (pf->ptp_tx && !pf->ptp_tx_skb) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + pf->ptp_tx_skb = skb_get(skb); + } else { + return 0; + } + + *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN << + I40E_TXD_CTX_QW1_CMD_SHIFT; + return 1; } @@ -1482,15 +1985,17 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, const u32 cd_tunneling, const u32 cd_l2tag2) { struct i40e_tx_context_desc *context_desc; + int i = tx_ring->next_to_use; - if (!cd_type_cmd_tso_mss && !cd_tunneling && !cd_l2tag2) + if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && + !cd_tunneling && !cd_l2tag2) return; /* grab the next descriptor */ - context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use); - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; + context_desc = I40E_TX_CTXTDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; /* cpu_to_le32 and assign to struct fields */ context_desc->tunneling_params = cpu_to_le32(cd_tunneling); @@ -1512,68 +2017,71 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, struct i40e_tx_buffer *first, u32 tx_flags, const u8 hdr_len, u32 td_cmd, u32 td_offset) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; unsigned int data_len = skb->data_len; unsigned int size = skb_headlen(skb); - struct device *dev = tx_ring->dev; - u32 paylen = skb->len - hdr_len; - u16 i = tx_ring->next_to_use; + struct skb_frag_struct *frag; struct i40e_tx_buffer *tx_bi; struct i40e_tx_desc *tx_desc; - u32 buf_offset = 0; + u16 i = tx_ring->next_to_use; u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; - dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_error; - if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> I40E_TX_FLAGS_VLAN_SHIFT; } + if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) + gso_segs = skb_shinfo(skb)->gso_segs; + else + gso_segs = 1; + + /* multiply data chunks by size of headers */ + first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); + first->gso_segs = gso_segs; + first->skb = skb; + first->tx_flags = tx_flags; + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + tx_desc = I40E_TX_DESC(tx_ring, i); - for (;;) { - while (size > I40E_MAX_DATA_PER_TXD) { - tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset); + tx_bi = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_bi, len, size); + dma_unmap_addr_set(tx_bi, dma, dma); + + tx_desc->buffer_addr = cpu_to_le64(dma); + + while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, I40E_MAX_DATA_PER_TXD, td_tag); - buf_offset += I40E_MAX_DATA_PER_TXD; - size -= I40E_MAX_DATA_PER_TXD; - tx_desc++; i++; if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; } - } - tx_bi = &tx_ring->tx_bi[i]; - tx_bi->length = buf_offset + size; - tx_bi->tx_flags = tx_flags; - tx_bi->dma = dma; + dma += I40E_MAX_DATA_PER_TXD; + size -= I40E_MAX_DATA_PER_TXD; - tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset); - tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, - size, td_tag); + tx_desc->buffer_addr = cpu_to_le64(dma); + } if (likely(!data_len)) break; - size = skb_frag_size(frag); - data_len -= size; - buf_offset = 0; - tx_flags |= I40E_TX_FLAGS_MAPPED_AS_PAGE; - - dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_error; + tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, + size, td_tag); tx_desc++; i++; @@ -1582,31 +2090,39 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, i = 0; } - frag++; - } - - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); + size = skb_frag_size(frag); + data_len -= size; - i++; - if (i == tx_ring->count) - i = 0; + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, + DMA_TO_DEVICE); - tx_ring->next_to_use = i; + tx_bi = &tx_ring->tx_bi[i]; + } - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) - gso_segs = skb_shinfo(skb)->gso_segs; - else - gso_segs = 1; + /* Place RS bit on last descriptor of any packet that spans across the + * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. + */ +#define WB_STRIDE 0x3 + if (((i & WB_STRIDE) != WB_STRIDE) && + (first <= &tx_ring->tx_bi[i]) && + (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << + I40E_TXD_QW1_CMD_SHIFT); + } else { + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)I40E_TXD_CMD << + I40E_TXD_QW1_CMD_SHIFT); + } - /* multiply data chunks by size of headers */ - tx_bi->bytecount = paylen + (gso_segs * hdr_len); - tx_bi->gso_segs = gso_segs; - tx_bi->skb = skb; + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); - /* set the timestamp and next to watch values */ + /* set the timestamp */ first->time_stamp = jiffies; - first->next_to_watch = tx_desc; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -1615,16 +2131,27 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, */ wmb(); + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + + /* notify HW of packet */ writel(i, tx_ring->tail); + return; dma_error: - dev_info(dev, "TX DMA map failed\n"); + dev_info(tx_ring->dev, "TX DMA map failed\n"); /* clear dma mappings for failed tx_bi map */ for (;;) { tx_bi = &tx_ring->tx_bi[i]; - i40e_unmap_tx_resource(tx_ring, tx_bi); + i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); if (tx_bi == first) break; if (i == 0) @@ -1632,8 +2159,6 @@ dma_error: i--; } - dev_kfree_skb_any(skb); - tx_ring->next_to_use = i; } @@ -1647,6 +2172,7 @@ dma_error: static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + /* Memory barrier before checking head and tail */ smp_mb(); /* Check again in a case another CPU has just made room available. */ @@ -1685,25 +2211,20 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) static int i40e_xmit_descriptor_count(struct sk_buff *skb, struct i40e_ring *tx_ring) { -#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD unsigned int f; -#endif int count = 0; /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 2 desc gap to keep tail from touching head, + * + 4 desc gap to avoid the cache line where head is, * + 1 desc for context descriptor, * otherwise try next time */ -#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); -#else - count += skb_shinfo(skb)->nr_frags; -#endif + count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40e_maybe_stop_tx(tx_ring, count + 3)) { + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { tx_ring->tx_stats.tx_busy++; return 0; } @@ -1728,6 +2249,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; + int tsyn; int tso; if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -1743,9 +2265,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, first = &tx_ring->tx_bi[tx_ring->next_to_use]; /* setup IPv4/IPv6 offloads */ - if (protocol == __constant_htons(ETH_P_IP)) + if (protocol == htons(ETH_P_IP)) tx_flags |= I40E_TX_FLAGS_IPV4; - else if (protocol == __constant_htons(ETH_P_IPV6)) + else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len, @@ -1758,16 +2280,21 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, skb_tx_timestamp(skb); - /* Always offload the checksum, since it's in the data descriptor */ - if (i40e_tx_csum(tx_ring, skb, tx_flags, protocol)) - tx_flags |= I40E_TX_FLAGS_CSUM; + tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); + + if (tsyn) + tx_flags |= I40E_TX_FLAGS_TSYN; - /* always enable offload insertion */ + /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - if (tx_flags & I40E_TX_FLAGS_CSUM) + /* Always offload the checksum, since it's in the data descriptor */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tx_flags |= I40E_TX_FLAGS_CSUM; + i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset, tx_ring, &cd_tunneling); + } i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); @@ -1801,7 +2328,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_ring *tx_ring = &vsi->tx_rings[skb->queue_mapping]; + struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; /* hardware can't handle really short frames, hardware padding works * beyond this point |
