diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 1 | ||||
-rw-r--r-- | net/core/dev.c | 101 | ||||
-rw-r--r-- | net/core/macsec.c | 445 | ||||
-rw-r--r-- | net/core/skbuff.c | 227 |
4 files changed, 769 insertions, 5 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 796f46eece5..618d0f3416e 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_HAS_DMA) += skb_dma_map.o obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o +obj-$(CONFIG_NET_MACSEC) += macsec.o obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o diff --git a/net/core/dev.c b/net/core/dev.c index 74d0ccef22d..9b2e671c7b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -104,6 +104,7 @@ #include <net/dst.h> #include <net/pkt_sched.h> #include <net/checksum.h> +#include <net/macsec.h> #include <linux/highmem.h> #include <linux/init.h> #include <linux/kmod.h> @@ -1721,6 +1722,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (dev->priv_flags & IFF_XMIT_DST_RELEASE) skb_dst_drop(skb); +#ifdef CONFIG_NET_MACSEC + if (netdev_macsec_priv(dev)) { + rc = dev->macsec_output_hw(skb, dev); + if (rc == -EINPROGRESS) + return 0; + } +#endif rc = ops->ndo_start_xmit(skb, dev); if (rc == NETDEV_TX_OK) txq_trans_update(txq); @@ -2350,7 +2358,21 @@ ncls: skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); if (!skb) goto out; - + +#ifdef CONFIG_NET_MACSEC + if (macsec_type_trans(skb) == ETH_P_MACSEC) { + if (skb->dev->macsec_priv) { + ret = skb->dev->macsec_input_hw(skb); + if (ret == -EINPROGRESS) { + ret = 0; + goto out; + } + } + kfree_skb(skb); + ret = NET_RX_DROP; + goto out; + } +#endif type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { @@ -2379,6 +2401,55 @@ out: } EXPORT_SYMBOL(netif_receive_skb); +int macsec_netif_receive_skb(struct sk_buff *skb, __be16 type) +{ + struct packet_type *ptype, *pt_prev; + struct net_device *orig_dev; + struct net_device *null_or_orig; + int ret = NET_RX_DROP; + + pt_prev = NULL; +#if 0 + orig_dev = skb_bond(skb); + if (!orig_dev) + return NET_RX_DROP; +#endif + //printk("calling macsec_netif_receive_skb\n"); + null_or_orig = NULL; + orig_dev = skb->dev; + if (orig_dev->master) { + printk("Master is Different\n"); + if (skb_bond_should_drop(skb)) + null_or_orig = orig_dev; /* deliver only exact match */ + else + skb->dev = orig_dev->master; + } + + list_for_each_entry_rcu(ptype, + &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { + if (ptype->type == type && + (ptype->dev == null_or_orig || ptype->dev == skb->dev || + ptype->dev == orig_dev)) { + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + } + pt_prev = ptype; + } + } + if (pt_prev) { + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); + } else { + if (skb_shinfo(skb)->nr_frags) { + printk(KERN_ERR "skb has frags which is not possible !!!\n"); + } + kfree_skb(skb); + ret = NET_RX_DROP; + } + + return ret; + +} + /* Network device is going away, flush any packets still pending */ static void flush_backlog(void *arg) { @@ -4328,6 +4399,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); const struct net_device_ops *ops; + void *mac_priv; if (!dev) return -ENODEV; @@ -4392,6 +4464,31 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCSETMACSEC: +#ifdef CONFIG_NET_MACSEC + mac_priv = netdev_macsec_priv(dev); + err = 0; + if (!mac_priv){ + err = macsec_init_state(dev); + } else { + printk("Macsec session already set\n"); + return -EEXIST; + } + dev->hard_header_len = ETH_HLEN + 8; + return err; +#else + return -EINVAL; +#endif + + case SIOCUNSETMACSEC: +#ifdef CONFIG_NET_MACSEC + macsec_destroy(dev); + dev->hard_header_len = ETH_HLEN; + return 0; +#else + return -EINVAL; +#endif + /* * Unknown or private ioctl */ @@ -4550,6 +4647,8 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: + case SIOCSETMACSEC: + case SIOCUNSETMACSEC: case SIOCSIFMAP: case SIOCSIFHWADDR: case SIOCSIFSLAVE: diff --git a/net/core/macsec.c b/net/core/macsec.c new file mode 100644 index 00000000000..7c3984de39b --- /dev/null +++ b/net/core/macsec.c @@ -0,0 +1,445 @@ + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/scatterlist.h> +#include <linux/if_ether.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/notifier.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> +#include <net/ip.h> +#include <net/macsec.h> +#include <crypto/aead.h> +#include <crypto/authenc.h> + +#define MACSEC_SKB_CB(__skb) ((struct macsec_skb_cb *)&((__skb)->cb[0])) + +int create_cnt = 0; +u32 delete_cnt = 0; +u32 create_opt_cnt = 0; +int delete_opt_cnt = 0; +int create_force_cnt = 0; +int macsec_ouput = 0; +int macsec_input = 0; + +static int macsec_req_ctx_size(struct crypto_aead *aead, int sg_size) +{ + unsigned int len = 0; + len += sizeof(struct aead_request) + crypto_aead_reqsize(aead); + + len = ALIGN(len, __alignof__(struct scatterlist)); + len += sizeof(struct scatterlist) * (sg_size); + + return len; +} + +static void *macsec_alloc_req_ctx( struct macsec_skb_cb *macsec_skb, + struct crypto_aead *aead, + int nfrags) +{ + void *ctx_data; + unsigned int len; + struct macsec_dev_ctx *ctx = macsec_skb->ctx; + +#if CONFIG_INET_MACSEC_NR_REQ_CACHE > 0 + if (nfrags <= MACSEC_NFRAGS_CACHE) { + macsec_skb->flags |= 0x01; + if (atomic_read(&ctx->req_cache_cnt)) { + ctx_data = ctx->req_cache[ctx->req_cache_head]; + ctx->req_cache_head = (ctx->req_cache_head + 1) % + MACSEC_REQ_CACHE_MAX; + atomic_dec(&ctx->req_cache_cnt); + create_opt_cnt++; + return ctx_data; + } + create_force_cnt++; + len = ctx->req_cache_size + + sizeof(struct scatterlist) * MACSEC_NFRAGS_CACHE; + ctx_data = kmalloc(len, GFP_ATOMIC); + } else { + create_cnt++; + macsec_skb->flags &= ~0x01; + len = ctx->req_cache_size + + sizeof(struct scatterlist) * nfrags; + ctx_data = kmalloc(len, GFP_ATOMIC); + } +#else + len = ctx->req_cache_size + + sizeof(struct scatterlist) * nfrags; + ctx_data = kmalloc(len, GFP_ATOMIC); +#endif + return ctx_data; +} +u32 glb_free_req_ctx_out = 0; +u32 glb_free_req_ctx_in = 0; +static void macsec_free_req_ctx(struct macsec_skb_cb *macsec_skb) +{ +#if CONFIG_INET_MACSEC_NR_REQ_CACHE > 0 + struct macsec_dev_ctx *ctx = macsec_skb->ctx; + + if (macsec_skb->flags & 0x01) { + if (atomic_read(&ctx->req_cache_cnt) < MACSEC_REQ_CACHE_MAX) { + ctx->req_cache[ctx->req_cache_tail] = macsec_skb->req_ctx; + ctx->req_cache_tail = (ctx->req_cache_tail + 1) % + MACSEC_REQ_CACHE_MAX; + atomic_inc(&ctx->req_cache_cnt); + delete_opt_cnt++; + return; + } + } +#endif + delete_cnt++; + kfree(macsec_skb->req_ctx); +} + +static inline struct scatterlist *macsec_req_sg(struct crypto_aead *aead, + struct aead_request *req) +{ + return (struct scatterlist *) ALIGN((unsigned long) (req + 1) + + crypto_aead_reqsize(aead), __alignof__(struct scatterlist)); +} + +__be16 macsec_type_trans(struct sk_buff *skb) +{ + struct macsec_ethhdr *eth; + eth = (struct macsec_ethhdr *)(skb->data - ETH_HLEN); + return eth->hdr.macsec_type; +} + +int macsec_init_aead(struct macsec_dev_ctx *mdata) +{ + struct crypto_aead *aead; + int err; + char *alg_name = "macsec(gcm)"; + char key[32] = { 0x88, 0xc5, 0x12, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x07, 0x52, 0x05, 0x20, 0x9f, 0xe8, 0x6b, 0xf8, + 0x8e, 0x7f, 0xa3, 0xaa, 0x77, 0x89, 0x58, 0xd2, + 0x50, 0x61, 0x75, 0x72, 0x81, 0x39, 0x7f, 0xcc}; + int key_len = 32; + + aead = crypto_alloc_aead(alg_name, 0, 0); + if (IS_ERR(aead)) { + printk("Failed to create aead transform for macsec(gcm)\n"); + err = PTR_ERR(aead); + goto error; + } + + mdata->aead = aead; + + err = crypto_aead_setkey(aead, key, key_len); + if (err) { + printk("Failed to set key for macsec(gcm)\n"); + goto error; + } + + err = crypto_aead_setauthsize(aead, 24); + if (err) { + printk("Failed to set authsize for macsec(gcm)\n"); + err = 0; + } +error: + return err; +} + +static void macsec_output_done_hw(struct crypto_async_request *base, int err) +{ + int ret; + struct sk_buff *skb = base->data; + struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; + + if (err < 0) { + macsec_free_req_ctx(MACSEC_SKB_CB(skb)); + return; + } + glb_free_req_ctx_out++; + macsec_free_req_ctx(MACSEC_SKB_CB(skb)); + ret = ops->ndo_start_xmit(skb, dev); +} + +int macsec_ouput_hw(struct sk_buff *skb, struct net_device *dev) +{ + int err; + struct macsec_dev_ctx *data; + struct crypto_aead *aead; + struct aead_request *req; + struct scatterlist *sg; + struct scatterlist *dsg; + struct sk_buff *trailer; + void *macsec_req; + int clen; + int alen; + int nfrags; + + err = -ENOMEM; + + data = netdev_macsec_priv(dev); + aead = data->aead; + alen = crypto_aead_authsize(aead); + + alen = 16; + + if ((err = skb_cow_data(skb, alen /* + 8 */, &trailer)) < 0) + goto error; + nfrags = err; + + MACSEC_SKB_CB(skb)->ctx = data; + macsec_req = macsec_alloc_req_ctx(MACSEC_SKB_CB(skb), aead, nfrags * 2); + if (!macsec_req) + goto error; + req = (struct aead_request*) macsec_req; + + aead_request_set_tfm(req, aead); + sg = macsec_req_sg(aead, req); + dsg = sg + nfrags; + + /* Setup SG */ + skb_to_sgvec(skb, sg, 0, skb->len); + + clen = skb->len; + pskb_put(skb, trailer, alen); + skb_push(skb, 8); + skb_to_sgvec(skb, dsg, 0, skb->len); + + MACSEC_SKB_CB(skb)->req_ctx = macsec_req; + + aead_request_set_callback(req, 0, macsec_output_done_hw, skb); + aead_request_set_crypt(req, sg, dsg, clen, NULL); + macsec_ouput++; + err = crypto_aead_encrypt(req); + + if (err == -EINPROGRESS) + goto error; + + if (err == -EAGAIN || err == -EBUSY) { + macsec_free_req_ctx(MACSEC_SKB_CB(skb)); + err = NET_XMIT_DROP; + } + +error: + return err; + +} + +void macsec_done_input_hw(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + int hlen = 22; /* ETH Header len + Macsec Secutity Tag(TCI + PN) */ + int ret; + struct macsec_ethhdr *eth; + + skb_reset_mac_header(skb); + eth = (struct macsec_ethhdr *)skb_mac_header(skb); + skb->protocol = eth->h_proto; + + pskb_trim(skb, skb->len - 16 /* icv */); + __skb_pull(skb, hlen); + skb_reset_network_header(skb); + skb->transport_header = skb->network_header; + + glb_free_req_ctx_in++; + macsec_free_req_ctx(MACSEC_SKB_CB(skb)); + ret = macsec_netif_receive_skb(skb, skb->protocol); + +} + +int macsec_input_hw(struct sk_buff *skb) +{ + struct macsec_dev_ctx *data; + struct crypto_aead *aead; + struct aead_request *req; + struct scatterlist *sg; + struct scatterlist *dsg; + struct sk_buff *trailer; + void *macsec_req; + int clen; + int nfrags; + int eth_len = ETH_HLEN; + int err = -EINVAL; + int src_len; + + data = netdev_macsec_priv(skb->dev); + aead = data->aead; + + if(!aead) + goto error; + + if (!pskb_may_pull(skb, eth_len)) + goto error; + + if ((err = skb_cow_data(skb, 0, &trailer)) < 0) + goto error; + nfrags = err; + + err = -ENOMEM; + + MACSEC_SKB_CB(skb)->ctx = data; + macsec_req = macsec_alloc_req_ctx(MACSEC_SKB_CB(skb), aead, nfrags * 2); + if (!macsec_req) + goto error; + req = (struct aead_request*) macsec_req; + aead_request_set_tfm(req, aead); + sg = macsec_req_sg(aead, req); + dsg = sg + nfrags; + + /* Setup SG */ + sg_init_table(sg, nfrags); + skb_push(skb, eth_len); + clen = skb->len; + skb_to_sgvec(skb, sg, 0, clen); + src_len = clen; + + sg_init_table(dsg, nfrags); + clen -= 16; + clen -= 8; + skb_to_sgvec(skb, dsg, 0, clen); + MACSEC_SKB_CB(skb)->req_ctx = macsec_req; + + aead_request_set_callback(req, 0, macsec_done_input_hw, skb); + aead_request_set_crypt(req, sg, dsg, src_len, NULL); + + //macsec_input++; + err = crypto_aead_decrypt(req); + + if (err == -EINPROGRESS) { + macsec_input++; + goto error; + } + if (err == -EBUSY || err == -EAGAIN) { + macsec_free_req_ctx(MACSEC_SKB_CB(skb)); + err = NET_XMIT_DROP; + } + +error: + return err; + +} + +static ssize_t macsec_console_driver_write(struct file *file, const char __user *buf, + size_t count, loff_t * ppos) +{ + if (*buf == '1') { + printk("Printing the delete and create stats =" + "create_cnt = %d, delete_cnt = %d, " + "create_opt_cnt = %d, delete_opt_cnt = %d,create_force_cnt = %d\n," + "glb_free_req_ctx_out = %d, glb_free_req_ctx_in = %d\n," + "macsec_input = %d, macsec_ouput = %d\n", + create_cnt, delete_cnt, create_opt_cnt, + delete_opt_cnt, create_force_cnt, glb_free_req_ctx_out, glb_free_req_ctx_in, + macsec_input, macsec_ouput); + } + return 1; +} + +static int macsec_console_driver_open(struct inode *inode, struct file *file) +{ + return 0; +} + +static int macsec_console_driver_release(struct inode *inode, struct file *file) +{ + return 0; +} + +struct file_operations macsec_console_driver_fops = { + .owner = THIS_MODULE, + .open = macsec_console_driver_open, + .release = macsec_console_driver_release, + .write = macsec_console_driver_write, +}; + +#define MACSEC_CONSOLE_DRIVER_NAME "macsec" +int macsec_console_module_init(void) +{ + struct proc_dir_entry *entry; + + entry = create_proc_entry(MACSEC_CONSOLE_DRIVER_NAME, 0, NULL); + if (entry == NULL) { + printk(KERN_ERR "Macsec Proc entry failed !!\n"); + return -1; + } + + entry->proc_fops = &macsec_console_driver_fops; + printk("Macsec proc interface Initiliazed\n"); + return 0; +} + +void macsec_console_module_exit(void) +{ + remove_proc_entry(MACSEC_CONSOLE_DRIVER_NAME, NULL); +} + +void macsec_destroy(struct net_device *dev) +{ + struct macsec_dev_ctx *macdata = dev->macsec_priv; + + if (!macdata) + return; + + crypto_free_aead(macdata->aead); + /* Delete request cache */ + while ((atomic_dec_return(&macdata->req_cache_cnt)) > 0) { + kfree(macdata->req_cache[macdata->req_cache_head]); + macdata->req_cache_head = (macdata->req_cache_head + 1) % + MACSEC_REQ_CACHE_MAX; + } + dev->macsec_priv = NULL; + dev->macsec_output_hw = NULL; + dev->macsec_input_hw = NULL; + + kfree(macdata); + printk("Macsec Session Destroyed\n"); +} + +int macsec_init_state(struct net_device *dev) +{ + struct macsec_dev_ctx *macdata; + int err; + + macdata = kzalloc(sizeof(*macdata), GFP_KERNEL); + if (macdata == NULL) + return -ENOMEM; + + dev->macsec_priv = macdata; + dev->macsec_output_hw = macsec_ouput_hw; + dev->macsec_input_hw = macsec_input_hw; + + err = macsec_init_aead(macdata); + if (err) + goto out; + +#if CONFIG_INET_MACSEC_NR_REQ_CACHE > 0 + atomic_set(&macdata->req_cache_cnt, 0); + macdata->req_cache_head = 0; + macdata->req_cache_tail = 0; +#endif + macdata->req_cache_size = macsec_req_ctx_size(macdata->aead, 0); + printk("Macsec Session Established\n"); +out: + return err; + +} + +static int __init macsec_init(void) +{ + int ret; + ret = macsec_console_module_init(); + if (ret) { + printk("Macsec proc driver could not initiliaze\n"); + return ret; + } + printk("Registered Macsec Interface\n"); + return 0; +} + +static void __exit macsec_fini(void) +{ + macsec_console_module_exit(); + printk("Unregistered Macsec Interface\n"); +} + +module_init(macsec_init); +module_exit(macsec_fini); +MODULE_LICENSE("GPL"); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ec85681a7dd..f5b46f6a970 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -73,9 +73,25 @@ static struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; + +static void sock_spd_buf_release(struct splice_pipe_desc *spd, unsigned int i) +{ + struct sk_buff *skb = (struct sk_buff *)spd->partial[i].private; + + kfree_skb(skb); +} + + static void sock_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { + struct sk_buff *skb = (struct sk_buff *) buf->private; + + kfree_skb(skb); +} +static void sock_pipe_buf_release_1(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ put_page(buf->page); } @@ -1374,7 +1390,7 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) { put_page(spd->pages[i]); } - +#if 0 static inline struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, struct sk_buff *skb, struct sock *sk) @@ -1488,7 +1504,6 @@ static inline int __splice_segment(struct page *page, unsigned int poff, return 0; } - /* * Map linear and fragment data from the skb to spd. It reports failure if the * pipe is full or if we already spliced the requested length. @@ -1521,7 +1536,6 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, return 0; } - /* * Map data from the skb to a pipe. Should handle both the linear part, * the fragments, and the frag list. It does NOT handle frag lists within @@ -1539,7 +1553,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .partial = partial, .flags = flags, .ops = &sock_pipe_buf_ops, - .spd_release = sock_spd_release, + .spd_release = sock_spd_buf_release, }; struct sk_buff *frag_iter; struct sock *sk = skb->sk; @@ -1584,7 +1598,212 @@ done: return 0; } +#else +/* + * Fill page/offset/length into spd, if it can hold more pages. + */ +static inline int spd_fill_page_1(struct splice_pipe_desc *spd, struct page *page, + unsigned int len, unsigned int offset, + struct sk_buff *skb) +{ + if (unlikely(spd->nr_pages == PIPE_BUFFERS)) + return 1; + + //get_page(page); + spd->pages[spd->nr_pages] = page; + spd->partial[spd->nr_pages].len = len; + spd->partial[spd->nr_pages].offset = offset; + spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb); + spd->nr_pages++; + return 0; +} +/* + * Map linear and fragment data from the skb to spd. Returns number of + * pages mapped. + */ +static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, + unsigned int *total_len, + struct splice_pipe_desc *spd, + struct sock *sk) +{ + unsigned int nr_pages = spd->nr_pages; + unsigned int poff, plen, len, toff, tlen; + int headlen, seg; + + toff = *offset; + tlen = *total_len; + if (!tlen) + goto err; + + /* + * if the offset is greater than the linear part, go directly to + * the fragments. + */ + headlen = skb_headlen(skb); + if (toff >= headlen) { + toff -= headlen; + goto map_frag; + } + + /* + * first map the linear region into the pages/partial map, skipping + * any potential initial offset. + */ + len = 0; + while (len < headlen) { + void *p = skb->data + len; + + poff = (unsigned long) p & (PAGE_SIZE - 1); + plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff); + len += plen; + + if (toff) { + if (plen <= toff) { + toff -= plen; + continue; + } + plen -= toff; + poff += toff; + toff = 0; + } + + plen = min(plen, tlen); + if (!plen) + break; + + /* + * just jump directly to update and return, no point + * in going over fragments when the output is full. + */ + if (spd_fill_page_1(spd, virt_to_page(p), plen, poff, skb)) + goto done; + + tlen -= plen; + } + + /* + * then map the fragments + */ +map_frag: + for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { + const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; + + plen = f->size; + poff = f->page_offset; + + if (toff) { + if (plen <= toff) { + toff -= plen; + continue; + } + plen -= toff; + poff += toff; + toff = 0; + } + + plen = min(plen, tlen); + if (!plen) + break; + + if (spd_fill_page_1(spd, f->page, plen, poff, skb)) + break; + + tlen -= plen; + } + +done: + if (spd->nr_pages - nr_pages) { + *offset = 0; + *total_len = tlen; + return 0; + } +err: + return 1; +} + +/* + * Map data from the skb to a pipe. Should handle both the linear part, + * the fragments, and the frag list. It does NOT handle frag lists within + * the frag list, if such a thing exists. We'd probably need to recurse to + * handle that cleanly. + */ +int skb_splice_bits(struct sk_buff *__skb, unsigned int offset, + struct pipe_inode_info *pipe, unsigned int tlen, + unsigned int flags) +{ + struct partial_page partial[PIPE_BUFFERS]; + struct page *pages[PIPE_BUFFERS]; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &sock_pipe_buf_ops, + .spd_release = sock_spd_buf_release, + }; + struct sock *sk = __skb->sk; +#if 1 + struct sk_buff *skb; + /* + * I'd love to avoid the clone here, but tcp_read_sock() + * ignores reference counts and unconditonally kills the sk_buff + * on return from the actor. + */ + skb = skb_clone(__skb, GFP_ATOMIC); + if (unlikely(!skb)) + return -ENOMEM; +#endif + /* + * __skb_splice_bits() only fails if the output has no room left, + * so no point in going over the frag_list for the error case. + */ + if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) + goto done; + else if (!tlen) + goto done; + + /* + * now see if we have a frag_list to map + */ + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list && tlen; list = list->next) { + if (__skb_splice_bits(list, &offset, &tlen, &spd, sk)) + break; + } + } + +done: +#if 1 + /* + * drop our reference to the clone, the pipe consumption will + * drop the rest. + */ + kfree_skb(skb); +#endif + if (spd.nr_pages) { + int ret; + + /* + * Drop the socket lock, otherwise we have reverse + * locking dependencies between sk_lock and i_mutex + * here as compared to sendfile(). We enter here + * with the socket lock held, and splice_to_pipe() will + * grab the pipe inode lock. For sendfile() emulation, + * we call into ->sendpage() with the i_mutex lock held + * and networking will grab the socket lock. + */ + release_sock(sk); + ret = splice_to_pipe(pipe, &spd); + lock_sock(sk); + return ret; + } + + return 0; +} + +#endif /** * skb_store_bits - store bits from kernel buffer to skb * @skb: destination buffer |