diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
| -rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 692 |
1 files changed, 603 insertions, 89 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f1f03bc5c72..e49f31dbd5d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 Intel Corporation. + * Copyright(c) 2013 - 2014 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -12,9 +12,8 @@ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * You should have received a copy of the GNU General Public License along + * with this program. If not, see <http://www.gnu.org/licenses/>. * * The full GNU General Public License is included in this distribution in * the file called "COPYING". @@ -25,7 +24,9 @@ * ******************************************************************************/ +#include <linux/prefetch.h> #include "i40e.h" +#include "i40e_prototype.h" static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, u32 td_tag) @@ -40,11 +41,12 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) /** * i40e_program_fdir_filter - Program a Flow Director filter - * @fdir_input: Packet data that will be filter parameters + * @fdir_data: Packet data that will be filter parameters + * @raw_packet: the pre-allocated packet buffer for FDir * @pf: The pf pointer * @add: True for add/update, False for remove **/ -int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, +int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, struct i40e_pf *pf, bool add) { struct i40e_filter_program_desc *fdir_desc; @@ -60,7 +62,7 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, /* find existing FDIR VSI */ vsi = NULL; - for (i = 0; i < pf->hw.func_caps.num_vsis; i++) + for (i = 0; i < pf->num_alloc_vsi; i++) if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) vsi = pf->vsi[i]; if (!vsi) @@ -69,15 +71,14 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, tx_ring = vsi->tx_rings[0]; dev = tx_ring->dev; - dma = dma_map_single(dev, fdir_data->raw_packet, - I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE); + dma = dma_map_single(dev, raw_packet, + I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto dma_fail; /* grab the next descriptor */ i = tx_ring->next_to_use; fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); - tx_buf = &tx_ring->tx_bi[i]; tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0; @@ -120,7 +121,7 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; dcc |= ((u32)fdir_data->cnt_index << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & - I40E_TXD_FLTR_QW1_CNTINDEX_MASK; + I40E_TXD_FLTR_QW1_CNTINDEX_MASK; } fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc); @@ -129,14 +130,22 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, /* Now program a dummy descriptor */ i = tx_ring->next_to_use; tx_desc = I40E_TX_DESC(tx_ring, i); + tx_buf = &tx_ring->tx_bi[i]; tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0; + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE); + dma_unmap_addr_set(tx_buf, dma, dma); + tx_desc->buffer_addr = cpu_to_le64(dma); td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY; tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0); + build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0); + + /* set the timestamp */ + tx_buf->time_stamp = jiffies; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -155,26 +164,322 @@ dma_fail: return -1; } +#define IP_HEADER_OFFSET 14 +#define I40E_UDPIP_DUMMY_PACKET_LEN 42 +/** + * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @raw_packet: the pre-allocated packet buffer for FDir + * @add: true adds a filter, false removes it + * + * Returns 0 if the filters were successfully added or removed + **/ +static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + u8 *raw_packet, bool add) +{ + struct i40e_pf *pf = vsi->back; + struct udphdr *udp; + struct iphdr *ip; + bool err = false; + int ret; + static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, + 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN); + + ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); + udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET + + sizeof(struct iphdr)); + + ip->daddr = fd_data->dst_ip[0]; + udp->dest = fd_data->dst_port; + ip->saddr = fd_data->src_ip[0]; + udp->source = fd_data->src_port; + + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + if (ret) { + dev_info(&pf->pdev->dev, + "Filter command send failed for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + err = true; + } else { + dev_info(&pf->pdev->dev, + "Filter OK for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + } + + return err ? -EOPNOTSUPP : 0; +} + +#define I40E_TCPIP_DUMMY_PACKET_LEN 54 +/** + * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @raw_packet: the pre-allocated packet buffer for FDir + * @add: true adds a filter, false removes it + * + * Returns 0 if the filters were successfully added or removed + **/ +static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + u8 *raw_packet, bool add) +{ + struct i40e_pf *pf = vsi->back; + struct tcphdr *tcp; + struct iphdr *ip; + bool err = false; + int ret; + /* Dummy packet */ + static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, + 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11, + 0x0, 0x72, 0, 0, 0, 0}; + + memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN); + + ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); + tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET + + sizeof(struct iphdr)); + + ip->daddr = fd_data->dst_ip[0]; + tcp->dest = fd_data->dst_port; + ip->saddr = fd_data->src_ip[0]; + tcp->source = fd_data->src_port; + + if (add) { + if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { + dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); + pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + } + } + + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + + if (ret) { + dev_info(&pf->pdev->dev, + "Filter command send failed for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + err = true; + } else { + dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + } + + fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + if (ret) { + dev_info(&pf->pdev->dev, + "Filter command send failed for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + err = true; + } else { + dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + } + + return err ? -EOPNOTSUPP : 0; +} + +/** + * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for + * a specific flow spec + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @raw_packet: the pre-allocated packet buffer for FDir + * @add: true adds a filter, false removes it + * + * Always returns -EOPNOTSUPP + **/ +static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + u8 *raw_packet, bool add) +{ + return -EOPNOTSUPP; +} + +#define I40E_IP_DUMMY_PACKET_LEN 34 +/** + * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for + * a specific flow spec + * @vsi: pointer to the targeted VSI + * @fd_data: the flow director data required for the FDir descriptor + * @raw_packet: the pre-allocated packet buffer for FDir + * @add: true adds a filter, false removes it + * + * Returns 0 if the filters were successfully added or removed + **/ +static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi, + struct i40e_fdir_filter *fd_data, + u8 *raw_packet, bool add) +{ + struct i40e_pf *pf = vsi->back; + struct iphdr *ip; + bool err = false; + int ret; + int i; + static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0, + 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0}; + + memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN); + ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET); + + ip->saddr = fd_data->src_ip[0]; + ip->daddr = fd_data->dst_ip[0]; + ip->protocol = 0; + + for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; + i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) { + fd_data->pctype = i; + ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add); + + if (ret) { + dev_info(&pf->pdev->dev, + "Filter command send failed for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + err = true; + } else { + dev_info(&pf->pdev->dev, + "Filter OK for PCTYPE %d (ret = %d)\n", + fd_data->pctype, ret); + } + } + + return err ? -EOPNOTSUPP : 0; +} + +/** + * i40e_add_del_fdir - Build raw packets to add/del fdir filter + * @vsi: pointer to the targeted VSI + * @cmd: command to get or set RX flow classification rules + * @add: true adds a filter, false removes it + * + **/ +int i40e_add_del_fdir(struct i40e_vsi *vsi, + struct i40e_fdir_filter *input, bool add) +{ + struct i40e_pf *pf = vsi->back; + u8 *raw_packet; + int ret; + + /* Populate the Flow Director that we have at the moment + * and allocate the raw packet buffer for the calling functions + */ + raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL); + if (!raw_packet) + return -ENOMEM; + + switch (input->flow_type & ~FLOW_EXT) { + case TCP_V4_FLOW: + ret = i40e_add_del_fdir_tcpv4(vsi, input, raw_packet, + add); + break; + case UDP_V4_FLOW: + ret = i40e_add_del_fdir_udpv4(vsi, input, raw_packet, + add); + break; + case SCTP_V4_FLOW: + ret = i40e_add_del_fdir_sctpv4(vsi, input, raw_packet, + add); + break; + case IPV4_FLOW: + ret = i40e_add_del_fdir_ipv4(vsi, input, raw_packet, + add); + break; + case IP_USER_FLOW: + switch (input->ip4_proto) { + case IPPROTO_TCP: + ret = i40e_add_del_fdir_tcpv4(vsi, input, + raw_packet, add); + break; + case IPPROTO_UDP: + ret = i40e_add_del_fdir_udpv4(vsi, input, + raw_packet, add); + break; + case IPPROTO_SCTP: + ret = i40e_add_del_fdir_sctpv4(vsi, input, + raw_packet, add); + break; + default: + ret = i40e_add_del_fdir_ipv4(vsi, input, + raw_packet, add); + break; + } + break; + default: + dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", + input->flow_type); + ret = -EINVAL; + } + + kfree(raw_packet); + return ret; +} + /** * i40e_fd_handle_status - check the Programming Status for FD * @rx_ring: the Rx ring for this descriptor - * @qw: the descriptor data + * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor. * @prog_id: the id originally used for programming * * This is used to verify if the FD programming or invalidation * requested by SW to the HW is successful or not and take actions accordingly. **/ -static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u32 qw, u8 prog_id) +static void i40e_fd_handle_status(struct i40e_ring *rx_ring, + union i40e_rx_desc *rx_desc, u8 prog_id) { - struct pci_dev *pdev = rx_ring->vsi->back->pdev; + struct i40e_pf *pf = rx_ring->vsi->back; + struct pci_dev *pdev = pf->pdev; + u32 fcnt_prog, fcnt_avail; u32 error; + u64 qw; + qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len); error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; - /* for now just print the Status */ - dev_info(&pdev->dev, "FD programming id %02x, Status %08x\n", - prog_id, error); + if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) { + dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n", + rx_desc->wb.qword0.hi_dword.fd_id); + + /* filter programming failed most likely due to table full */ + fcnt_prog = i40e_get_current_fd_count(pf); + fcnt_avail = i40e_get_fd_cnt_all(pf); + /* If ATR is running fcnt_prog can quickly change, + * if we are very close to full, it makes sense to disable + * FD ATR/SB and then re-enable it when there is room. + */ + if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) { + /* Turn off ATR first */ + if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { + pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + dev_warn(&pdev->dev, "FD filter space full, ATR for further flows will be turned off\n"); + pf->auto_disable_flags |= + I40E_FLAG_FD_ATR_ENABLED; + pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT; + } else if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n"); + pf->auto_disable_flags |= + I40E_FLAG_FD_SB_ENABLED; + pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT; + } + } else { + dev_info(&pdev->dev, "FD filter programming error\n"); + } + } else if (error == + (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) { + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pdev->dev, "ntuple filter loc = %d, could not be removed\n", + rx_desc->wb.qword0.hi_dword.fd_id); + } } /** @@ -309,6 +614,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) } /** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} + +/** * i40e_clean_tx_irq - Reclaim resources after transmit completes * @tx_ring: tx ring to clean * @budget: how many cleans we're allowed @@ -319,6 +638,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) { u16 i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; + struct i40e_tx_desc *tx_head; struct i40e_tx_desc *tx_desc; unsigned int total_packets = 0; unsigned int total_bytes = 0; @@ -327,6 +647,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_desc = I40E_TX_DESC(tx_ring, i); i -= tx_ring->count; + tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); + do { struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; @@ -337,9 +659,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) /* prevent any other reads prior to eop_desc */ read_barrier_depends(); - /* if the descriptor isn't done, no work yet to do */ - if (!(eop_desc->cmd_type_offset_bsz & - cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) + /* we have caught up to head, no work left to do */ + if (tx_head == tx_desc) break; /* clear next_to_watch to prevent false hangs */ @@ -571,7 +892,7 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring, I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT; if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS) - i40e_fd_handle_status(rx_ring, qw, id); + i40e_fd_handle_status(rx_ring, rx_desc, id); } /** @@ -595,6 +916,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring) /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); + /* add u32 for head writeback, align after this takes care of + * guaranteeing this is at least one cache line in size + */ + tx_ring->size += sizeof(u32); tx_ring->size = ALIGN(tx_ring->size, 4096); tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); @@ -768,7 +1093,7 @@ void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) skb = netdev_alloc_skb_ip_align(rx_ring->netdev, rx_ring->rx_buf_len); if (!skb) { - rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_ring->rx_stats.alloc_buff_failed++; goto no_buffers; } /* initialize queue mapping */ @@ -782,7 +1107,7 @@ void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) rx_ring->rx_buf_len, DMA_FROM_DEVICE); if (dma_mapping_error(rx_ring->dev, bi->dma)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_ring->rx_stats.alloc_buff_failed++; bi->dma = 0; goto no_buffers; } @@ -792,7 +1117,7 @@ void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) if (!bi->page) { bi->page = alloc_page(GFP_ATOMIC); if (!bi->page) { - rx_ring->rx_stats.alloc_rx_page_failed++; + rx_ring->rx_stats.alloc_page_failed++; goto no_buffers; } } @@ -807,7 +1132,7 @@ void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) DMA_FROM_DEVICE); if (dma_mapping_error(rx_ring->dev, bi->page_dma)) { - rx_ring->rx_stats.alloc_rx_page_failed++; + rx_ring->rx_stats.alloc_page_failed++; bi->page_dma = 0; goto no_buffers; } @@ -860,27 +1185,107 @@ static void i40e_receive_skb(struct i40e_ring *rx_ring, * @skb: skb currently being received and modified * @rx_status: status value of last descriptor in packet * @rx_error: error value of last descriptor in packet + * @rx_ptype: ptype value of last descriptor in packet **/ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, struct sk_buff *skb, u32 rx_status, - u32 rx_error) + u32 rx_error, + u16 rx_ptype) { + struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype); + bool ipv4 = false, ipv6 = false; + bool ipv4_tunnel, ipv6_tunnel; + __wsum rx_udp_csum; + struct iphdr *iph; + __sum16 csum; + + ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) && + (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4); + ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) && + (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4); + + skb->encapsulation = ipv4_tunnel || ipv6_tunnel; skb->ip_summed = CHECKSUM_NONE; /* Rx csum enabled and ip headers found? */ - if (!(vsi->netdev->features & NETIF_F_RXCSUM && - rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) + if (!(vsi->netdev->features & NETIF_F_RXCSUM)) + return; + + /* did the hardware decode the packet and checksum? */ + if (!(rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT))) return; - /* IP or L4 checksum error */ - if (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | - (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) { - vsi->back->hw_csum_rx_error++; + /* both known and outer_ip must be set for the below code to work */ + if (!(decoded.known && decoded.outer_ip)) return; + + if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) + ipv4 = true; + else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) + ipv6 = true; + + if (ipv4 && + (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) | + (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))) + goto checksum_fail; + + /* likely incorrect csum if alternate IP extension headers found */ + if (ipv6 && + decoded.inner_prot == I40E_RX_PTYPE_INNER_PROT_TCP && + rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT) && + rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) + /* don't increment checksum err here, non-fatal err */ + return; + + /* there was some L4 error, count error and punt packet to the stack */ + if (rx_error & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) + goto checksum_fail; + + /* handle packets that were not able to be checksummed due + * to arrival speed, in this case the stack can compute + * the csum. + */ + if (rx_error & (1 << I40E_RX_DESC_ERROR_PPRS_SHIFT)) + return; + + /* If VXLAN traffic has an outer UDPv4 checksum we need to check + * it in the driver, hardware does not do it for us. + * Since L3L4P bit was set we assume a valid IHL value (>=5) + * so the total length of IPv4 header is IHL*4 bytes + * The UDP_0 bit *may* bet set if the *inner* header is UDP + */ + if (ipv4_tunnel && + (decoded.inner_prot != I40E_RX_PTYPE_INNER_PROT_UDP) && + !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) { + skb->transport_header = skb->mac_header + + sizeof(struct ethhdr) + + (ip_hdr(skb)->ihl * 4); + + /* Add 4 bytes for VLAN tagged packets */ + skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)) + ? VLAN_HLEN : 0; + + rx_udp_csum = udp_csum(skb); + iph = ip_hdr(skb); + csum = csum_tcpudp_magic( + iph->saddr, iph->daddr, + (skb->len - skb_transport_offset(skb)), + IPPROTO_UDP, rx_udp_csum); + + if (udp_hdr(skb)->check != csum) + goto checksum_fail; } skb->ip_summed = CHECKSUM_UNNECESSARY; + + return; + +checksum_fail: + vsi->back->hw_csum_rx_error++; } /** @@ -891,13 +1296,38 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, static inline u32 i40e_rx_hash(struct i40e_ring *ring, union i40e_rx_desc *rx_desc) { - if (ring->netdev->features & NETIF_F_RXHASH) { - if ((le64_to_cpu(rx_desc->wb.qword1.status_error_len) >> - I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) & - I40E_RX_DESC_FLTSTAT_RSS_HASH) - return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); - } - return 0; + const __le64 rss_mask = + cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH << + I40E_RX_DESC_STATUS_FLTSTAT_SHIFT); + + if ((ring->netdev->features & NETIF_F_RXHASH) && + (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) + return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss); + else + return 0; +} + +/** + * i40e_ptype_to_hash - get a hash type + * @ptype: the ptype value from the descriptor + * + * Returns a hash type to be used by skb_set_hash + **/ +static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype) +{ + struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype); + + if (!decoded.known) + return PKT_HASH_TYPE_NONE; + + if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4) + return PKT_HASH_TYPE_L4; + else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP && + decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3) + return PKT_HASH_TYPE_L3; + else + return PKT_HASH_TYPE_L2; } /** @@ -917,12 +1347,16 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) u16 i = rx_ring->next_to_clean; union i40e_rx_desc *rx_desc; u32 rx_error, rx_status; + u8 rx_ptype; u64 qword; + if (budget <= 0) + return 0; + rx_desc = I40E_RX_DESC(rx_ring, i); qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) - >> I40E_RXD_QW1_STATUS_SHIFT; + rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> + I40E_RXD_QW1_STATUS_SHIFT; while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) { union i40e_rx_desc *next_rxd; @@ -938,18 +1372,20 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) skb = rx_bi->skb; prefetch(skb->data); - rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) - >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT; - rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) - >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT; - rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) - >> I40E_RXD_QW1_LENGTH_SPH_SHIFT; + rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >> + I40E_RXD_QW1_LENGTH_HBUF_SHIFT; + rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >> + I40E_RXD_QW1_LENGTH_SPH_SHIFT; - rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) - >> I40E_RXD_QW1_ERROR_SHIFT; + rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >> + I40E_RXD_QW1_ERROR_SHIFT; rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT); rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT); + rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> + I40E_RXD_QW1_PTYPE_SHIFT; rx_bi->skb = NULL; /* This memory barrier is needed to keep us from reading @@ -1026,17 +1462,29 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* ERR_MASK will only have valid bits if EOP set */ if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) { dev_kfree_skb_any(skb); + /* TODO: shouldn't we increment a counter indicating the + * drop? + */ goto next_desc; } - skb->rxhash = i40e_rx_hash(rx_ring, rx_desc); - i40e_rx_checksum(vsi, skb, rx_status, rx_error); + skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc), + i40e_ptype_to_hash(rx_ptype)); + if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) { + i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status & + I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >> + I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT); + rx_ring->last_rx_timestamp = jiffies; + } /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; total_rx_packets++; skb->protocol = eth_type_trans(skb, rx_ring->netdev); + + i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype); + vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; @@ -1059,8 +1507,8 @@ next_desc: /* use prefetched values */ rx_desc = next_rxd; qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); - rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) - >> I40E_RXD_QW1_STATUS_SHIFT; + rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >> + I40E_RXD_QW1_STATUS_SHIFT; } rx_ring->next_to_clean = i; @@ -1173,15 +1621,13 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, u16 i; /* make sure ATR is enabled */ - if (!(pf->flags & I40E_FLAG_FDIR_ATR_ENABLED)) + if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) return; /* if sampling is disabled do nothing */ if (!tx_ring->atr_sample_rate) return; - tx_ring->atr_count++; - /* snag network header to get L4 type and address */ hdr.network = skb_network_header(skb); @@ -1203,8 +1649,17 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, th = (struct tcphdr *)(hdr.network + hlen); - /* sample on all syn/fin packets or once every atr sample rate */ - if (!th->fin && !th->syn && (tx_ring->atr_count < tx_ring->atr_sample_rate)) + /* Due to lack of space, no more new filters can be programmed */ + if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) + return; + + tx_ring->atr_count++; + + /* sample on all syn/fin/rst packets or once every atr sample rate */ + if (!th->fin && + !th->syn && + !th->rst && + (tx_ring->atr_count < tx_ring->atr_sample_rate)) return; tx_ring->atr_count = 0; @@ -1228,7 +1683,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; - dtype_cmd |= th->fin ? + dtype_cmd |= (th->fin || th->rst) ? (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << I40E_TXD_FLTR_QW1_PCMD_SHIFT) : (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << @@ -1240,6 +1695,11 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT; + dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; + dtype_cmd |= + ((u32)pf->fd_atr_cnt_idx << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & + I40E_TXD_FLTR_QW1_CNTINDEX_MASK; + fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); } @@ -1268,7 +1728,7 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT; tx_flags |= I40E_TX_FLAGS_HW_VLAN; /* else if it is a SW VLAN, check the next protocol and store the tag */ - } else if (protocol == __constant_htons(ETH_P_8021Q)) { + } else if (protocol == htons(ETH_P_8021Q)) { struct vlan_hdr *vhdr, _vhdr; vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); if (!vhdr) @@ -1288,9 +1748,11 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, I40E_TX_FLAGS_VLAN_PRIO_SHIFT; if (tx_flags & I40E_TX_FLAGS_SW_VLAN) { struct vlan_ethhdr *vhdr; - if (skb_header_cloned(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; + int rc; + + rc = skb_cow_head(skb, 0); + if (rc < 0) + return rc; vhdr = (struct vlan_ethhdr *)skb->data; vhdr->h_vlan_TCI = htons(tx_flags >> I40E_TX_FLAGS_VLAN_SHIFT); @@ -1318,22 +1780,20 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling) { u32 cd_cmd, cd_tso_len, cd_mss; + struct ipv6hdr *ipv6h; struct tcphdr *tcph; struct iphdr *iph; u32 l4len; int err; - struct ipv6hdr *ipv6h; if (!skb_is_gso(skb)) return 0; - if (skb_header_cloned(skb)) { - err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (err) - return err; - } + err = skb_cow_head(skb, 0); + if (err < 0) + return err; - if (protocol == __constant_htons(ETH_P_IP)) { + if (protocol == htons(ETH_P_IP)) { iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); iph->tot_len = 0; @@ -1359,10 +1819,47 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, cd_cmd = I40E_TX_CTX_DESC_TSO; cd_tso_len = skb->len - *hdr_len; cd_mss = skb_shinfo(skb)->gso_size; - *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) - | ((u64)cd_tso_len - << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) - | ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) | + ((u64)cd_tso_len << + I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | + ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT); + return 1; +} + +/** + * i40e_tsyn - set up the tsyn context descriptor + * @tx_ring: ptr to the ring to send + * @skb: ptr to the skb we're sending + * @tx_flags: the collected send information + * + * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen + **/ +static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb, + u32 tx_flags, u64 *cd_type_cmd_tso_mss) +{ + struct i40e_pf *pf; + + if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) + return 0; + + /* Tx timestamps cannot be sampled when doing TSO */ + if (tx_flags & I40E_TX_FLAGS_TSO) + return 0; + + /* only timestamp the outbound packet if the user has requested it and + * we are not already transmitting a packet to be timestamped + */ + pf = i40e_netdev_to_pf(tx_ring->netdev); + if (pf->ptp_tx && !pf->ptp_tx_skb) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + pf->ptp_tx_skb = skb_get(skb); + } else { + return 0; + } + + *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN << + I40E_TXD_CTX_QW1_CMD_SHIFT; + return 1; } @@ -1490,7 +1987,8 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, struct i40e_tx_context_desc *context_desc; int i = tx_ring->next_to_use; - if (!cd_type_cmd_tso_mss && !cd_tunneling && !cd_l2tag2) + if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) && + !cd_tunneling && !cd_l2tag2) return; /* grab the next descriptor */ @@ -1601,9 +2099,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_bi = &tx_ring->tx_bi[i]; } - tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, size, td_tag) | - cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); + /* Place RS bit on last descriptor of any packet that spans across the + * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline. + */ +#define WB_STRIDE 0x3 + if (((i & WB_STRIDE) != WB_STRIDE) && + (first <= &tx_ring->tx_bi[i]) && + (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) { + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP << + I40E_TXD_QW1_CMD_SHIFT); + } else { + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)I40E_TXD_CMD << + I40E_TXD_QW1_CMD_SHIFT); + } netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index), @@ -1660,6 +2172,7 @@ dma_error: static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); + /* Memory barrier before checking head and tail */ smp_mb(); /* Check again in a case another CPU has just made room available. */ @@ -1698,25 +2211,20 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) static int i40e_xmit_descriptor_count(struct sk_buff *skb, struct i40e_ring *tx_ring) { -#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD unsigned int f; -#endif int count = 0; /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD, * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD, - * + 2 desc gap to keep tail from touching head, + * + 4 desc gap to avoid the cache line where head is, * + 1 desc for context descriptor, * otherwise try next time */ -#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); -#else - count += skb_shinfo(skb)->nr_frags; -#endif + count += TXD_USE_COUNT(skb_headlen(skb)); - if (i40e_maybe_stop_tx(tx_ring, count + 3)) { + if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) { tx_ring->tx_stats.tx_busy++; return 0; } @@ -1741,6 +2249,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, __be16 protocol; u32 td_cmd = 0; u8 hdr_len = 0; + int tsyn; int tso; if (0 == i40e_xmit_descriptor_count(skb, tx_ring)) return NETDEV_TX_BUSY; @@ -1756,9 +2265,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, first = &tx_ring->tx_bi[tx_ring->next_to_use]; /* setup IPv4/IPv6 offloads */ - if (protocol == __constant_htons(ETH_P_IP)) + if (protocol == htons(ETH_P_IP)) tx_flags |= I40E_TX_FLAGS_IPV4; - else if (protocol == __constant_htons(ETH_P_IPV6)) + else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len, @@ -1771,6 +2280,11 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, skb_tx_timestamp(skb); + tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss); + + if (tsyn) + tx_flags |= I40E_TX_FLAGS_TSYN; + /* always enable CRC insertion offload */ td_cmd |= I40E_TX_DESC_CMD_ICRC; |
