diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-24 19:11:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-24 19:11:49 -0700 |
commit | 832fe9c222c7d431c2bff5765a0ac61bcb3df8c8 (patch) | |
tree | ce4a482723db61955c47a028b14e3227d290d3be | |
parent | ed9559d38a87a44e3bda87d73a50aab92471d7dc (diff) | |
parent | e34f87256794b87e7f4a8f1812538be7b7b5214c (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus:
virtio: Add transport feature handling stub for virtio_ring.
virtio: Rename set_features to finalize_features
virtio: Formally reserve bits 28-31 to be 'transport' features.
s390: use virtio_console for KVM on s390
virtio: console as a config option
virtio_console: use virtqueue notification for hvc_console
hvc_console: rework setup to replace irq functions with callbacks
virtio_blk: check for hardsector size from host
virtio: Use bus_type probe and remove methods
virtio: don't always force a notification when ring is full
virtio: clarify that ABI is usable by any implementations
virtio: Recycle unused recv buffer pages for large skbs in net driver
virtio net: Allow receiving SG packets
virtio net: Add ethtool ops for SG/GSO
virtio: fix virtio_net xmit of freed skb bug
28 files changed, 385 insertions, 124 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index eb530b4128b..2ed88122be9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -565,6 +565,7 @@ bool "s390 guest support (EXPERIMENTAL)" depends on 64BIT && EXPERIMENTAL select VIRTIO select VIRTIO_RING + select VIRTIO_CONSOLE help Select this option if you want to run the kernel under s390 linux endmenu diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b358e18273b..62122bad1e3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include <asm/sections.h> #include <asm/ebcdic.h> #include <asm/compat.h> +#include <asm/kvm_virtio.h> long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | PSW_MASK_MCHECK | PSW_DEFAULT_KEY); @@ -766,7 +767,8 @@ setup_arch(char **cmdline_p) printk("We are running under VM (64 bit mode)\n"); else if (MACHINE_IS_KVM) { printk("We are running under KVM (64 bit mode)\n"); - add_preferred_console("ttyS", 1, NULL); + add_preferred_console("hvc", 0, NULL); + s390_virtio_console_init(); } else printk("We are running native (64 bit mode)\n"); #endif /* CONFIG_64BIT */ diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dd7ea203f94..42251095134 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev) int err; u64 cap; u32 v; + u32 blk_size; if (index_to_minor(index) >= 1 << MINORBITS) return -ENOSPC; @@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err) blk_queue_max_hw_segments(vblk->disk->queue, v); + /* Host can optionally specify the block size of the device */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, + offsetof(struct virtio_blk_config, blk_size), + &blk_size); + if (!err) + blk_queue_hardsect_size(vblk->disk->queue, blk_size); + add_disk(vblk->disk); return 0; @@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, - VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, + VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, }; static struct virtio_driver virtio_blk = { diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 67b07576f8b..6c070dc5f2d 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -578,11 +578,14 @@ config HVC_DRIVER It will automatically be selected if one of the back-end console drivers is selected. +config HVC_IRQ + bool config HVC_CONSOLE bool "pSeries Hypervisor Virtual Console support" depends on PPC_PSERIES select HVC_DRIVER + select HVC_IRQ help pSeries machines when partitioned support a hypervisor virtual console. This driver allows each pSeries partition to have a console @@ -593,6 +596,7 @@ config HVC_ISERIES depends on PPC_ISERIES default y select HVC_DRIVER + select HVC_IRQ help iSeries machines support a hypervisor virtual console. @@ -614,13 +618,18 @@ config HVC_XEN bool "Xen Hypervisor Console support" depends on XEN select HVC_DRIVER + select HVC_IRQ default y help Xen virtual console device driver config VIRTIO_CONSOLE - bool + tristate "Virtio console" + depends on VIRTIO select HVC_DRIVER + help + Virtio console for use with lguest and other hypervisors. + config HVCS tristate "IBM Hypervisor Virtual Console Server support" diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 4b6e736cfa0..eb02c350680 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o obj-$(CONFIG_HVC_BEAT) += hvc_beat.o obj-$(CONFIG_HVC_DRIVER) += hvc_console.o +obj-$(CONFIG_HVC_IRQ) += hvc_irq.o obj-$(CONFIG_HVC_XEN) += hvc_xen.o obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o obj-$(CONFIG_RAW_DRIVER) += raw.o diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 2f9759d625c..02aac104842 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -27,7 +27,6 @@ #include <linux/init.h> #include <linux/kbd_kern.h> #include <linux/kernel.h> -#include <linux/kref.h> #include <linux/kthread.h> #include <linux/list.h> #include <linux/module.h> @@ -75,23 +74,6 @@ static int hvc_init(void); static int sysrq_pressed; #endif -struct hvc_struct { - spinlock_t lock; - int index; - struct tty_struct *tty; - unsigned int count; - int do_wakeup; - char *outbuf; - int outbuf_size; - int n_outbuf; - uint32_t vtermno; - struct hv_ops *ops; - int irq_requested; - int irq; - struct list_head next; - struct kref kref; /* ref count & hvc_struct lifetime */ -}; - /* dynamic list of hvc_struct instances */ static LIST_HEAD(hvc_structs); @@ -298,27 +280,15 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops) return 0; } +EXPORT_SYMBOL_GPL(hvc_instantiate); /* Wake the sleeping khvcd */ -static void hvc_kick(void) +void hvc_kick(void) { hvc_kicked = 1; wake_up_process(hvc_task); } - -static int hvc_poll(struct hvc_struct *hp); - -/* - * NOTE: This API isn't used if the console adapter doesn't support interrupts. - * In this case the console is poll driven. - */ -static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) -{ - /* if hvc_poll request a repoll, then kick the hvcd thread */ - if (hvc_poll(dev_instance)) - hvc_kick(); - return IRQ_HANDLED; -} +EXPORT_SYMBOL_GPL(hvc_kick); static void hvc_unthrottle(struct tty_struct *tty) { @@ -333,7 +303,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) { struct hvc_struct *hp; unsigned long flags; - int irq = 0; int rc = 0; /* Auto increments kref reference if found. */ @@ -352,18 +321,15 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */ hp->tty = tty; - /* Save for request_irq outside of spin_lock. */ - irq = hp->irq; - if (irq) - hp->irq_requested = 1; + + if (hp->ops->notifier_add) + rc = hp->ops->notifier_add(hp, hp->data); spin_unlock_irqrestore(&hp->lock, flags); - /* check error, fallback to non-irq */ - if (irq) - rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp); + /* - * If the request_irq() fails and we return an error. The tty layer + * If the notifier fails we return an error. The tty layer * will call hvc_close() after a failed open but we don't want to clean * up there so we'll clean up here and clear out the previously set * tty fields and return the kref reference. @@ -371,7 +337,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) if (rc) { spin_lock_irqsave(&hp->lock, flags); hp->tty = NULL; - hp->irq_requested = 0; spin_unlock_irqrestore(&hp->lock, flags); tty->driver_data = NULL; kref_put(&hp->kref, destroy_hvc_struct); @@ -386,7 +351,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) static void hvc_close(struct tty_struct *tty, struct file * filp) { struct hvc_struct *hp; - int irq = 0; unsigned long flags; if (tty_hung_up_p(filp)) @@ -404,9 +368,8 @@ static void hvc_close(struct tty_struct *tty, struct file * filp) spin_lock_irqsave(&hp->lock, flags); if (--hp->count == 0) { - if (hp->irq_requested) - irq = hp->irq; - hp->irq_requested = 0; + if (hp->ops->notifier_del) + hp->ops->notifier_del(hp, hp->data); /* We are done with the tty pointer now. */ hp->tty = NULL; @@ -418,10 +381,6 @@ static void hvc_close(struct tty_struct *tty, struct file * filp) * waking periodically to check chars_in_buffer(). */ tty_wait_until_sent(tty, HVC_CLOSE_WAIT); - - if (irq) - free_irq(irq, hp); - } else { if (hp->count < 0) printk(KERN_ERR "hvc_close %X: oops, count is %d\n", @@ -436,7 +395,6 @@ static void hvc_hangup(struct tty_struct *tty) { struct hvc_struct *hp = tty->driver_data; unsigned long flags; - int irq = 0; int temp_open_count; if (!hp) @@ -458,13 +416,12 @@ static void hvc_hangup(struct tty_struct *tty) hp->count = 0; hp->n_outbuf = 0; hp->tty = NULL; - if (hp->irq_requested) - /* Saved for use outside of spin_lock. */ - irq = hp->irq; - hp->irq_requested = 0; + + if (hp->ops->notifier_del) + hp->ops->notifier_del(hp, hp->data); + spin_unlock_irqrestore(&hp->lock, flags); - if (irq) - free_irq(irq, hp); + while(temp_open_count) { --temp_open_count; kref_put(&hp->kref, destroy_hvc_struct); @@ -575,7 +532,7 @@ static u32 timeout = MIN_TIMEOUT; #define HVC_POLL_READ 0x00000001 #define HVC_POLL_WRITE 0x00000002 -static int hvc_poll(struct hvc_struct *hp) +int hvc_poll(struct hvc_struct *hp) { struct tty_struct *tty; int i, n, poll_mask = 0; @@ -602,10 +559,10 @@ static int hvc_poll(struct hvc_struct *hp) if (test_bit(TTY_THROTTLED, &tty->flags)) goto throttled; - /* If we aren't interrupt driven and aren't throttled, we always + /* If we aren't notifier driven and aren't throttled, we always * request a reschedule */ - if (hp->irq == 0) + if (!hp->irq_requested) poll_mask |= HVC_POLL_READ; /* Read data if any */ @@ -674,6 +631,7 @@ static int hvc_poll(struct hvc_struct *hp) return poll_mask; } +EXPORT_SYMBOL_GPL(hvc_poll); /* * This kthread is either polling or interrupt driven. This is determined by @@ -733,7 +691,7 @@ static const struct tty_operations hvc_ops = { .chars_in_buffer = hvc_chars_in_buffer, }; -struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, +struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data, struct hv_ops *ops, int outbuf_size) { struct hvc_struct *hp; @@ -754,7 +712,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, memset(hp, 0x00, sizeof(*hp)); hp->vtermno = vtermno; - hp->irq = irq; + hp->data = data; hp->ops = ops; hp->outbuf_size = outbuf_size; hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))]; @@ -784,6 +742,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, return hp; } +EXPORT_SYMBOL_GPL(hvc_alloc); int __devexit hvc_remove(struct hvc_struct *hp) { diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 42ffb17e15d..d9ce1091562 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -26,6 +26,7 @@ #ifndef HVC_CONSOLE_H #define HVC_CONSOLE_H +#include <linux/kref.h> /* * This is the max number of console adapters that can/will be found as @@ -42,24 +43,50 @@ */ #define HVC_ALLOC_TTY_ADAPTERS 8 +struct hvc_struct { + spinlock_t lock; + int index; + struct tty_struct *tty; + unsigned int count; + int do_wakeup; + char *outbuf; + int outbuf_size; + int n_outbuf; + uint32_t vtermno; + struct hv_ops *ops; + int irq_requested; + int data; + struct list_head next; + struct kref kref; /* ref count & hvc_struct lifetime */ +}; /* implemented by a low level driver */ struct hv_ops { int (*get_chars)(uint32_t vtermno, char *buf, int count); int (*put_chars)(uint32_t vtermno, const char *buf, int count); -}; -struct hvc_struct; + /* Callbacks for notification. Called in open and close */ + int (*notifier_add)(struct hvc_struct *hp, int irq); + void (*notifier_del)(struct hvc_struct *hp, int irq); +}; /* Register a vterm and a slot index for use as a console (console_init) */ extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops); /* register a vterm for hvc tty operation (module_init or hotplug add) */ -extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq, +extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data, struct hv_ops *ops, int outbuf_size); -/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */ +/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */ extern int __devexit hvc_remove(struct hvc_struct *hp); +/* data available */ +int hvc_poll(struct hvc_struct *hp); +void hvc_kick(void); + +/* default notifier for irq based notification */ +extern int notifier_add_irq(struct hvc_struct *hp, int data); +extern void notifier_del_irq(struct hvc_struct *hp, int data); + #if defined(CONFIG_XMON) && defined(CONFIG_SMP) #include <asm/xmon.h> diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c new file mode 100644 index 00000000000..73a59cdb894 --- /dev/null +++ b/drivers/char/hvc_irq.c @@ -0,0 +1,44 @@ +/* + * Copyright IBM Corp. 2001,2008 + * + * This file contains the IRQ specific code for hvc_console + * + */ + +#include <linux/interrupt.h> + +#include "hvc_console.h" + +static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) +{ + /* if hvc_poll request a repoll, then kick the hvcd thread */ + if (hvc_poll(dev_instance)) + hvc_kick(); + return IRQ_HANDLED; +} + +/* + * For IRQ based systems these callbacks can be used + */ +int notifier_add_irq(struct hvc_struct *hp, int irq) +{ + int rc; + + if (!irq) { + hp->irq_requested = 0; + return 0; + } + rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, + "hvc_console", hp); + if (!rc) + hp->irq_requested = 1; + return rc; +} + +void notifier_del_irq(struct hvc_struct *hp, int irq) +{ + if (!irq) + return; + free_irq(irq, hp); + hp->irq_requested = 0; +} diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c index a08f8f981c1..b71c610fe5a 100644 --- a/drivers/char/hvc_iseries.c +++ b/drivers/char/hvc_iseries.c @@ -200,6 +200,8 @@ done: static struct hv_ops hvc_get_put_ops = { .get_chars = get_chars, .put_chars = put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __devinit hvc_vio_probe(struct vio_dev *vdev, diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 79711aa4b41..93f3840c168 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -80,6 +80,8 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count) static struct hv_ops hvc_get_put_ops = { .get_chars = filtered_get_chars, .put_chars = hvc_put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __devinit hvc_vio_probe(struct vio_dev *vdev, diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index db2ae421627..6b70aa66a58 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -100,6 +100,8 @@ static int read_console(uint32_t vtermno, char *buf, int len) static struct hv_ops hvc_ops = { .get_chars = read_console, .put_chars = write_console, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __init xen_init(void) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index dc17fe3a88b..d0f4eb6fdb7 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -46,6 +46,9 @@ static char *in, *inbuf; /* The operations for our console. */ static struct hv_ops virtio_cons; +/* The hvc device */ +static struct hvc_struct *hvc; + /*D:310 The put_chars() callback is pretty straightforward. * * We turn the characters into a scatter-gather list, add it to the output @@ -134,6 +137,27 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) return hvc_instantiate(0, 0, &virtio_cons); } +/* + * we support only one console, the hvc struct is a global var + * There is no need to do anything + */ +static int notifier_add_vio(struct hvc_struct *hp, int data) +{ + hp->irq_requested = 1; + return 0; +} + +static void notifier_del_vio(struct hvc_struct *hp, int data) +{ + hp->irq_requested = 0; +} + +static void hvc_handle_input(struct virtqueue *vq) +{ + if (hvc_poll(hvc)) + hvc_kick(); +} + /*D:370 Once we're further in boot, we get probed like any other virtio device. * At this stage we set up the output virtqueue. * @@ -144,7 +168,6 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) static int __devinit virtcons_probe(struct virtio_device *dev) { int err; - struct hvc_struct *hvc; vdev = dev; @@ -158,7 +181,7 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Find the input queue. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev->config->find_vq(vdev, 0, NULL); + in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input); if (IS_ERR(in_vq)) { err = PTR_ERR(in_vq); goto free; @@ -173,15 +196,18 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Start using the new console output. */ virtio_cons.get_chars = get_chars; virtio_cons.put_chars = put_chars; + virtio_cons.notifier_add = notifier_add_vio; + virtio_cons.notifier_del = notifier_del_vio; /* The first argument of hvc_alloc() is the virtual console number, so - * we use zero. The second argument is the interrupt number; we - * currently leave this as zero: it would be better not to use the - * hvc mechanism and fix this (FIXME!). + * we use zero. The second argument is the parameter for the + * notification mechanism (like irq number). We currently leave this + * as zero, virtqueues have implicit notifications. * * The third argument is a "struct hv_ops" containing the put_chars() - * and get_chars() pointers. The final argument is the output buffer - * size: we can do any size, so we put PAGE_SIZE here. */ + * get_chars(), notifier_add() and notifier_del() pointers. + * The final argument is the output buffer size: we can do any size, + * so we put PAGE_SIZE here. */ hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); if (IS_ERR(hvc)) { err = PTR_ERR(hvc); diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 1a8de57289e..37344aaee22 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -98,16 +98,20 @@ static u32 lg_get_features(struct virtio_device *vdev) return features; } -static void lg_set_features(struct virtio_device *vdev, u32 features) +static void lg_finalize_features(struct virtio_device *vdev) { - unsigned int i; + unsigned int i, bits; struct lguest_device_desc *desc = to_lgdev(vdev)->desc; /* Second half of bitmap is features we accept. */ u8 *out_features = lg_features(desc) + desc->feature_len; + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + memset(out_features, 0, desc->feature_len); - for (i = 0; i < min(desc->feature_len * 8, 32); i++) { - if (features & (1 << i)) + bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (test_bit(i, vdev->features)) out_features[i / 8] |= (1 << (i % 8)); } } @@ -297,7 +301,7 @@ static void lg_del_vq(struct virtqueue *vq) /* The ops structure which hooks everything together. */ static struct virtio_config_ops lguest_config_ops = { .get_features = lg_get_features, - .set_features = lg_set_features, + .finalize_features = lg_finalize_features, .get = lg_get, .set = lg_set, .get_status = lg_get_status, diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c28d7cb2035..0196a0df902 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -19,6 +19,7 @@ //#define DEBUG #include <linux/netdevice.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> #include <linux/module.h> #include <linux/virtio.h> #include <linux/virtio_net.h> @@ -54,9 +55,15 @@ struct virtnet_info struct tasklet_struct tasklet; bool free_in_tasklet; + /* I like... big packets and I cannot lie! */ + bool big_packets; + /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; + + /* Chain pages by the private ptr. */ + struct page *pages; }; static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) @@ -69,6 +76,23 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); } +static void give_a_page(struct virtnet_info *vi, struct page *page) +{ + page->private = (unsigned long)vi->pages; + vi->pages = page; +} + +static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) +{ + struct page *p = vi->pages; + + if (p) + vi->pages = (struct page *)p->private; + else + p = alloc_page(gfp_mask); + return p; +} + static void skb_xmit_done(struct virtqueue *svq) { struct virtnet_info *vi = svq->vdev->priv; @@ -88,6 +112,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + int err; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -95,10 +120,23 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, goto drop; } len -= sizeof(struct virtio_net_hdr); - BUG_ON(len > MAX_PACKET_LEN); - skb_trim(skb, len); + if (len <= MAX_PACKET_LEN) { + unsigned int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page); + skb->data_len = 0; + skb_shinfo(skb)->nr_frags = 0; + } + + err = pskb_trim(skb, len); + if (err) { + pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); + dev->stats.rx_dropped++; + goto drop; + } + skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; @@ -160,7 +198,7 @@ static void try_fill_recv(struct virtnet_info *vi) { struct sk_buff *skb; struct scatterlist sg[2+MAX_SKB_FRAGS]; - int num, err; + int num, err, i; sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { @@ -170,6 +208,24 @@ static void try_fill_recv(struct virtnet_info *vi) skb_put(skb, MAX_PACKET_LEN); vnet_hdr_to_sg(sg, skb); + + if (vi->big_packets) { + for (i = 0; i < MAX_SKB_FRAGS; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + f->page = get_a_page(vi, GFP_ATOMIC); + if (!f->page) + break; + + f->page_offset = 0; + f->size = PAGE_SIZE; + + skb->data_len += PAGE_SIZE; + skb->len += PAGE_SIZE; + + skb_shinfo(skb)->nr_frags++; + } + } + num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; skb_queue_head(&vi->recv, skb); @@ -335,16 +391,11 @@ again: free_old_xmit_skbs(vi); /* If we has a buffer left over from last time, send it now. */ - if (unlikely(vi->last_xmit_skb)) { - if (xmit_skb(vi, vi->last_xmit_skb) != 0) { - /* Drop this skb: we only queue one. */ - vi->dev->stats.tx_dropped++; - kfree_skb(skb); - skb = NULL; - goto stop_queue; - } - vi->last_xmit_skb = NULL; - } + if (unlikely(vi->last_xmit_skb) && + xmit_skb(vi, vi->last_xmit_skb) != 0) + goto stop_queue; + + vi->last_xmit_skb = NULL; /* Put new one in send queue and do transmit */ if (likely(skb)) { @@ -370,6 +421,11 @@ stop_queue: netif_start_queue(dev); goto again; } + if (skb) { + /* Drop this skb: we only queue one. */ + vi->dev->stats.tx_dropped++; + kfree_skb(skb); + } goto done; } @@ -408,6 +464,22 @@ static int virtnet_close(struct net_device *dev) return 0; } +static int virtnet_set_tx_csum(struct net_device *dev, u32 data) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct virtio_device *vdev = vi->vdev; + + if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) + return -ENOSYS; + + return ethtool_op_set_tx_hw_csum(dev, data); +} + +static struct ethtool_ops virtnet_ethtool_ops = { + .set_tx_csum = virtnet_set_tx_csum, + .set_sg = ethtool_op_set_sg, +}; + static int virtnet_probe(struct virtio_device *vdev) { int err; @@ -427,6 +499,7 @@ static int virtnet_probe(struct virtio_device *vdev) #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = virtnet_netpoll; #endif + SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops); SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ @@ -462,11 +535,18 @@ static int virtnet_probe(struct virtio_device *vdev) vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; + vi->pages = NULL; /* If they give us a callback when all buffers are done, we don't need * the timer. */ vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + vi->big_packets = true; + /* We expect two virtqueues, receive then send. */ vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); if (IS_ERR(vi->rvq)) { @@ -541,6 +621,10 @@ static void virtnet_remove(struct virtio_device *vdev) vdev->config->del_vq(vi->svq); vdev->config->del_vq(vi->rvq); unregister_netdev(vi->dev); + + while (vi->pages) + __free_pages(get_a_page(vi, GFP_KERNEL), 0); + free_netdev(vi->dev); } @@ -553,7 +637,9 @@ static unsigned int features[] = { VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, /* We |