diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 212 | ||||
-rw-r--r-- | net/core/skbuff.c | 27 |
2 files changed, 151 insertions, 88 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 5379b0c1190..ec5be1c7f2f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -135,6 +135,14 @@ /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) +enum { + GRO_MERGED, + GRO_MERGED_FREE, + GRO_HELD, + GRO_NORMAL, + GRO_DROP, +}; + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -207,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; } +static inline void *skb_gro_mac_header(struct sk_buff *skb) +{ + return skb_headlen(skb) ? skb_mac_header(skb) : + page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset; +} + /* Device list insertion */ static int list_netdevice(struct net_device *dev) { @@ -1708,56 +1723,26 @@ out_kfree_skb: return 0; } -static u32 simple_tx_hashrnd; -static int simple_tx_hashrnd_initialized = 0; +static u32 skb_tx_hashrnd; +static int skb_tx_hashrnd_initialized = 0; -static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb) { - u32 addr1, addr2, ports; - u32 hash, ihl; - u8 ip_proto = 0; - - if (unlikely(!simple_tx_hashrnd_initialized)) { - get_random_bytes(&simple_tx_hashrnd, 4); - simple_tx_hashrnd_initialized = 1; - } + u32 hash; - switch (skb->protocol) { - case htons(ETH_P_IP): - if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) - ip_proto = ip_hdr(skb)->protocol; - addr1 = ip_hdr(skb)->saddr; - addr2 = ip_hdr(skb)->daddr; - ihl = ip_hdr(skb)->ihl; - break; - case htons(ETH_P_IPV6): - ip_proto = ipv6_hdr(skb)->nexthdr; - addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; - addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; - default: - return 0; + if (unlikely(!skb_tx_hashrnd_initialized)) { + get_random_bytes(&skb_tx_hashrnd, 4); + skb_tx_hashrnd_initialized = 1; } + if (skb_rx_queue_recorded(skb)) { + hash = skb_get_rx_queue(skb); + } else if (skb->sk && skb->sk->sk_hash) { + hash = skb->sk->sk_hash; + } else + hash = skb->protocol; - switch (ip_proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_DCCP: - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_SCTP: - case IPPROTO_UDPLITE: - ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); - break; - - default: - ports = 0; - break; - } - - hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + hash = jhash_1word(hash, skb_tx_hashrnd); return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } @@ -1771,7 +1756,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (ops->ndo_select_queue) queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) - queue_index = simple_tx_hash(dev, skb); + queue_index = skb_tx_hash(dev, skb); skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); @@ -2372,7 +2357,6 @@ static int napi_gro_complete(struct sk_buff *skb) out: skb_shinfo(skb)->gso_size = 0; - __skb_push(skb, -skb_network_offset(skb)); return netif_receive_skb(skb); } @@ -2390,6 +2374,25 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); +void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) +{ + unsigned int offset = skb_gro_offset(skb); + + hlen += offset; + if (hlen <= skb_headlen(skb)) + return skb->data + offset; + + if (unlikely(!skb_shinfo(skb)->nr_frags || + skb_shinfo(skb)->frags[0].size <= + hlen - skb_headlen(skb) || + PageHighMem(skb_shinfo(skb)->frags[0].page))) + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + + return page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset + offset; +} +EXPORT_SYMBOL(skb_gro_header); + int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -2399,7 +2402,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) int count = 0; int same_flow; int mac_len; - int free; + int ret; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2410,11 +2413,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; + void *mac; if (ptype->type != type || ptype->dev || !ptype->gro_receive) continue; - skb_reset_network_header(skb); + skb_set_network_header(skb, skb_gro_offset(skb)); + mac = skb_gro_mac_header(skb); mac_len = skb->network_header - skb->mac_header; skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; @@ -2428,8 +2433,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) continue; if (p->mac_len != mac_len || - memcmp(skb_mac_header(p), skb_mac_header(skb), - mac_len)) + memcmp(skb_mac_header(p), mac, mac_len)) NAPI_GRO_CB(p)->same_flow = 0; } @@ -2442,7 +2446,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) goto normal; same_flow = NAPI_GRO_CB(skb)->same_flow; - free = NAPI_GRO_CB(skb)->free; + ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; if (pp) { struct sk_buff *nskb = *pp; @@ -2456,21 +2460,20 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { - __skb_push(skb, -skb_network_offset(skb)); + if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) goto normal; - } NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = skb->len; + skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; + ret = GRO_HELD; ok: - return free; + return ret; normal: - return -1; + return GRO_NORMAL; } EXPORT_SYMBOL(dev_gro_receive); @@ -2486,18 +2489,32 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return dev_gro_receive(napi, skb); } -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +int napi_skb_finish(int ret, struct sk_buff *skb) { - switch (__napi_gro_receive(napi, skb)) { - case -1: + int err = NET_RX_SUCCESS; + + switch (ret) { + case GRO_NORMAL: return netif_receive_skb(skb); - case 1: + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ + + case GRO_MERGED_FREE: kfree_skb(skb); break; } - return NET_RX_SUCCESS; + return err; +} +EXPORT_SYMBOL(napi_skb_finish); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + skb_gro_reset_offset(skb); + + return napi_skb_finish(__napi_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -2515,6 +2532,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; + struct ethhdr *eth; + skb_frag_t *frag; + int i; napi->skb = NULL; @@ -2527,20 +2547,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, } BUG_ON(info->nr_frags > MAX_SKB_FRAGS); + frag = &info->frags[info->nr_frags - 1]; + + for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { + skb_fill_page_desc(skb, i, frag->page, frag->page_offset, + frag->size); + frag++; + } skb_shinfo(skb)->nr_frags = info->nr_frags; - memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); skb->data_len = info->len; skb->len += info->len; skb->truesize += info->len; - if (!pskb_may_pull(skb, ETH_HLEN)) { + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header(skb, sizeof(*eth)); + if (!eth) { napi_reuse_skb(napi, skb); skb = NULL; goto out; } - skb->protocol = eth_type_trans(skb, dev); + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; skb->ip_summed = info->ip_summed; skb->csum = info->csum; @@ -2550,29 +2586,47 @@ out: } EXPORT_SYMBOL(napi_fraginfo_skb); -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); - int err = NET_RX_DROP; + int err = NET_RX_SUCCESS; + int may; - if (!skb) - goto out; + switch (ret) { + case GRO_NORMAL: + case GRO_HELD: + may = pskb_may_pull(skb, skb_gro_offset(skb)); + BUG_ON(!may); - err = NET_RX_SUCCESS; + skb->protocol = eth_type_trans(skb, napi->dev); - switch (__napi_gro_receive(napi, skb)) { - case -1: - return netif_receive_skb(skb); + if (ret == GRO_NORMAL) + return netif_receive_skb(skb); - case 0: - goto out; - } + skb_gro_pull(skb, -ETH_HLEN); + break; - napi_reuse_skb(napi, skb); + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ + + case GRO_MERGED_FREE: + napi_reuse_skb(napi, skb); + break; + } -out: return err; } +EXPORT_SYMBOL(napi_frags_finish); + +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +{ + struct sk_buff *skb = napi_fraginfo_skb(napi, info); + + if (!skb) + return NET_RX_DROP; + + return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); +} EXPORT_SYMBOL(napi_gro_frags); static int process_backlog(struct napi_struct *napi, int quota) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index da74b844f4e..f20e758fe46 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2585,17 +2585,23 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *p = *head; struct sk_buff *nskb; unsigned int headroom; - unsigned int hlen = p->data - skb_mac_header(p); - unsigned int len = skb->len; + unsigned int len = skb_gro_len(skb); - if (hlen + p->len + len >= 65536) + if (p->len + len >= 65536) return -E2BIG; if (skb_shinfo(p)->frag_list) goto merge; - else if (!skb_headlen(p) && !skb_headlen(skb) && - skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < - MAX_SKB_FRAGS) { + else if (skb_headlen(skb) <= skb_gro_offset(skb)) { + if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags > + MAX_SKB_FRAGS) + return -E2BIG; + + skb_shinfo(skb)->frags[0].page_offset += + skb_gro_offset(skb) - skb_headlen(skb); + skb_shinfo(skb)->frags[0].size -= + skb_gro_offset(skb) - skb_headlen(skb); + memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); @@ -2612,7 +2618,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) } headroom = skb_headroom(p); - nskb = netdev_alloc_skb(p->dev, headroom); + nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); if (unlikely(!nskb)) return -ENOMEM; @@ -2620,12 +2626,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) nskb->mac_len = p->mac_len; skb_reserve(nskb, headroom); + __skb_put(nskb, skb_gro_offset(p)); - skb_set_mac_header(nskb, -hlen); + skb_set_mac_header(nskb, skb_mac_header(p) - p->data); skb_set_network_header(nskb, skb_network_offset(p)); skb_set_transport_header(nskb, skb_transport_offset(p)); - memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); + __skb_pull(p, skb_gro_offset(p)); + memcpy(skb_mac_header(nskb), skb_mac_header(p), + p->data - skb_mac_header(p)); *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; |