diff options
author | Shirley Ma <mashirle@us.ibm.com> | 2011-07-06 12:22:12 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-07-07 04:41:13 -0700 |
commit | a6686f2f382b13f8a7253401a66690c3633b6a74 (patch) | |
tree | 36a9a4546163a3c5b9a9ab3daa3b6bbe34af55aa /net/core/skbuff.c | |
parent | 1cdebb423202e255366a321814fc6df079802a0d (diff) |
skbuff: skb supports zero-copy buffers
This patch adds userspace buffers support in skb shared info. A new
struct skb_ubuf_info is needed to maintain the userspace buffers
argument and index, a callback is used to notify userspace to release
the buffers once lower device has done DMA (Last reference to that skb
has gone).
If there is any userspace apps to reference these userspace buffers,
then these userspaces buffers will be copied into kernel. This way we
can prevent userspace apps from holding these userspace buffers too long.
Use destructor_arg to point to the userspace buffer info; a new tx flags
SKBTX_DEV_ZEROCOPY is added for zero-copy buffer check.
Signed-off-by: Shirley Ma <xma@...ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r-- | net/core/skbuff.c | 80 |
1 files changed, 79 insertions, 1 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 46cbd28f40f..a9577a2f3a4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -329,6 +329,18 @@ static void skb_release_data(struct sk_buff *skb) put_page(skb_shinfo(skb)->frags[i].page); } + /* + * If skb buf is from userspace, we need to notify the caller + * the lower device DMA has done; + */ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + struct ubuf_info *uarg; + + uarg = skb_shinfo(skb)->destructor_arg; + if (uarg->callback) + uarg->callback(uarg); + } + if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -481,6 +493,9 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size) if (irqs_disabled()) return false; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) + return false; + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) return false; @@ -596,6 +611,51 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); +/* skb frags copy userspace buffers to kernel */ +static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +{ + int i; + int num_frags = skb_shinfo(skb)->nr_frags; + struct page *page, *head = NULL; + struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg; + + for (i = 0; i < num_frags; i++) { + u8 *vaddr; + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + + page = alloc_page(GFP_ATOMIC); + if (!page) { + while (head) { + struct page *next = (struct page *)head->private; + put_page(head); + head = next; + } + return -ENOMEM; + } + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); + memcpy(page_address(page), + vaddr + f->page_offset, f->size); + kunmap_skb_frag(vaddr); + page->private = (unsigned long)head; + head = page; + } + + /* skb frags release userspace buffers */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + put_page(skb_shinfo(skb)->frags[i].page); + + uarg->callback(uarg); + + /* skb frags point to kernel buffers */ + for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { + skb_shinfo(skb)->frags[i - 1].page_offset = 0; + skb_shinfo(skb)->frags[i - 1].page = head; + head = (struct page *)head->private; + } + return 0; +} + + /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone @@ -614,6 +674,11 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) { struct sk_buff *n; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) + return NULL; + } + n = skb + 1; if (skb->fclone == SKB_FCLONE_ORIG && n->fclone == SKB_FCLONE_UNAVAILABLE) { @@ -731,6 +796,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->nr_frags) { int i; + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) { + kfree(n); + goto out; + } + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; get_page(skb_shinfo(n)->frags[i].page); @@ -788,7 +859,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, fastpath = true; else { int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; - fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta; } @@ -819,6 +889,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (fastpath) { kfree(skb->head); } else { + /* copy this zero copy skb frags */ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, gfp_mask)) + goto nofrags; + } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); @@ -853,6 +928,8 @@ adjust_others: atomic_set(&skb_shinfo(skb)->dataref, 1); return 0; +nofrags: + kfree(data); nodata: return -ENOMEM; } @@ -1354,6 +1431,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) } start = end; } + if (!len) return 0; |