From 9953ca6cb757fb317bb7cdd2fcbf9b88312e241b Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Tue, 27 May 2008 12:06:26 +0100 Subject: virtio: fix virtio_net xmit of freed skb bug On Mon, 2008-05-26 at 17:42 +1000, Rusty Russell wrote: > If we fail to transmit a packet, we assume the queue is full and put > the skb into last_xmit_skb. However, if more space frees up before we > xmit it, we loop, and the result can be transmitting the same skb twice. > > Fix is simple: set skb to NULL if we've used it in some way, and check > before sending. ... > diff -r 564237b31993 drivers/net/virtio_net.c > --- a/drivers/net/virtio_net.c Mon May 19 12:22:00 2008 +1000 > +++ b/drivers/net/virtio_net.c Mon May 19 12:24:58 2008 +1000 > @@ -287,21 +287,25 @@ again: > free_old_xmit_skbs(vi); > > /* If we has a buffer left over from last time, send it now. */ > - if (vi->last_xmit_skb) { > + if (unlikely(vi->last_xmit_skb)) { > if (xmit_skb(vi, vi->last_xmit_skb) != 0) { > /* Drop this skb: we only queue one. */ > vi->dev->stats.tx_dropped++; > kfree_skb(skb); > + skb = NULL; > goto stop_queue; > } > vi->last_xmit_skb = NULL; With this, may drop an skb and then later in the function discover that we could have sent it after all. Poor wee skb :) How about the incremental patch below? Cheers, Mark. Subject: [PATCH] virtio_net: Delay dropping tx skbs Currently we drop the skb in start_xmit() if we have a queued buffer and fail to transmit it. However, if we delay dropping it until we've stopped the queue and enabled the tx notification callback, then there is a chance space might become available for it. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c28d7cb2035..06d5c43bb20 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -335,16 +335,11 @@ again: free_old_xmit_skbs(vi); /* If we has a buffer left over from last time, send it now. */ - if (unlikely(vi->last_xmit_skb)) { - if (xmit_skb(vi, vi->last_xmit_skb) != 0) { - /* Drop this skb: we only queue one. */ - vi->dev->stats.tx_dropped++; - kfree_skb(skb); - skb = NULL; - goto stop_queue; - } - vi->last_xmit_skb = NULL; - } + if (unlikely(vi->last_xmit_skb) && + xmit_skb(vi, vi->last_xmit_skb) != 0) + goto stop_queue; + + vi->last_xmit_skb = NULL; /* Put new one in send queue and do transmit */ if (likely(skb)) { @@ -370,6 +365,11 @@ stop_queue: netif_start_queue(dev); goto again; } + if (skb) { + /* Drop this skb: we only queue one. */ + vi->dev->stats.tx_dropped++; + kfree_skb(skb); + } goto done; } -- cgit v1.2.3-18-g5258 From a9ea3fc6f2654a7407864fec983d1671d775b5ee Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2008 11:21:42 +0800 Subject: virtio net: Add ethtool ops for SG/GSO This patch adds some basic ethtool operations to virtio_net so I could test SG without GSO (which was really useful because TSO turned out to be buggy :) Signed-off-by: Rusty Russell (remove MTU setting) --- drivers/net/virtio_net.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 06d5c43bb20..ce37a7e9541 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -19,6 +19,7 @@ //#define DEBUG #include #include +#include #include #include #include @@ -408,6 +409,22 @@ static int virtnet_close(struct net_device *dev) return 0; } +static int virtnet_set_tx_csum(struct net_device *dev, u32 data) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct virtio_device *vdev = vi->vdev; + + if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) + return -ENOSYS; + + return ethtool_op_set_tx_hw_csum(dev, data); +} + +static struct ethtool_ops virtnet_ethtool_ops = { + .set_tx_csum = virtnet_set_tx_csum, + .set_sg = ethtool_op_set_sg, +}; + static int virtnet_probe(struct virtio_device *vdev) { int err; @@ -427,6 +444,7 @@ static int virtnet_probe(struct virtio_device *vdev) #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = virtnet_netpoll; #endif + SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops); SET_NETDEV_DEV(dev, &vdev->dev); /* Do we support "hardware" checksums? */ -- cgit v1.2.3-18-g5258 From 97402b96f87c6e32f75f1bffdd91a5ee144b679d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Apr 2008 11:24:27 +0800 Subject: virtio net: Allow receiving SG packets Finally this patch lets virtio_net receive GSO packets in addition to sending them. This can definitely be optimised for the non-GSO case. For comparison the Xen approach stores one page in each skb and uses subsequent skb's pages to construct an SG skb instead of preallocating the maximum amount of pages per skb. Signed-off-by: Rusty Russell (added feature bits) --- drivers/net/virtio_net.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ce37a7e9541..0886b8a2d92 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -55,6 +55,9 @@ struct virtnet_info struct tasklet_struct tasklet; bool free_in_tasklet; + /* I like... big packets and I cannot lie! */ + bool big_packets; + /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; @@ -89,6 +92,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); + int err; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); @@ -96,10 +100,14 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, goto drop; } len -= sizeof(struct virtio_net_hdr); - BUG_ON(len > MAX_PACKET_LEN); - - skb_trim(skb, len); + err = pskb_trim(skb, len); + if (err) { + pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); + dev->stats.rx_dropped++; + goto drop; + } + skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; @@ -161,7 +169,7 @@ static void try_fill_recv(struct virtnet_info *vi) { struct sk_buff *skb; struct scatterlist sg[2+MAX_SKB_FRAGS]; - int num, err; + int num, err, i; sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { @@ -171,6 +179,24 @@ static void try_fill_recv(struct virtnet_info *vi) skb_put(skb, MAX_PACKET_LEN); vnet_hdr_to_sg(sg, skb); + + if (vi->big_packets) { + for (i = 0; i < MAX_SKB_FRAGS; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + f->page = alloc_page(GFP_ATOMIC); + if (!f->page) + break; + + f->page_offset = 0; + f->size = PAGE_SIZE; + + skb->data_len += PAGE_SIZE; + skb->len += PAGE_SIZE; + + skb_shinfo(skb)->nr_frags++; + } + } + num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; skb_queue_head(&vi->recv, skb); @@ -485,6 +511,12 @@ static int virtnet_probe(struct virtio_device *vdev) * the timer. */ vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY); + /* If we can receive ANY GSO packets, we must allocate large ones. */ + if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) + || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + vi->big_packets = true; + /* We expect two virtqueues, receive then send. */ vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); if (IS_ERR(vi->rvq)) { @@ -571,7 +603,9 @@ static unsigned int features[] = { VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, - VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ + VIRTIO_F_NOTIFY_ON_EMPTY, }; static struct virtio_driver virtio_net = { -- cgit v1.2.3-18-g5258 From fb6813f480806d62361719e84777c8e00d3e86a8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Jul 2008 12:06:01 -0500 Subject: virtio: Recycle unused recv buffer pages for large skbs in net driver If we hack the virtio_net driver to always allocate full-sized (64k+) skbuffs, the driver slows down (lguest numbers): Time to receive 1GB (small buffers): 10.85 seconds Time to receive 1GB (64k+ buffers): 24.75 seconds Of course, large buffers use up more space in the ring, so we increase that from 128 to 2048: Time to receive 1GB (64k+ buffers, 2k ring): 16.61 seconds If we recycle pages rather than using alloc_page/free_page: Time to receive 1GB (64k+ buffers, 2k ring, recycle pages): 10.81 seconds This demonstrates that with efficient allocation, we don't need to have a separate "small buffer" queue. Signed-off-by: Rusty Russell --- drivers/net/virtio_net.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0886b8a2d92..0196a0df902 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -61,6 +61,9 @@ struct virtnet_info /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; + + /* Chain pages by the private ptr. */ + struct page *pages; }; static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) @@ -73,6 +76,23 @@ static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); } +static void give_a_page(struct virtnet_info *vi, struct page *page) +{ + page->private = (unsigned long)vi->pages; + vi->pages = page; +} + +static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask) +{ + struct page *p = vi->pages; + + if (p) + vi->pages = (struct page *)p->private; + else + p = alloc_page(gfp_mask); + return p; +} + static void skb_xmit_done(struct virtqueue *svq) { struct virtnet_info *vi = svq->vdev->priv; @@ -101,6 +121,15 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb, } len -= sizeof(struct virtio_net_hdr); + if (len <= MAX_PACKET_LEN) { + unsigned int i; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) + give_a_page(dev->priv, skb_shinfo(skb)->frags[i].page); + skb->data_len = 0; + skb_shinfo(skb)->nr_frags = 0; + } + err = pskb_trim(skb, len); if (err) { pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); @@ -183,7 +212,7 @@ static void try_fill_recv(struct virtnet_info *vi) if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { skb_frag_t *f = &skb_shinfo(skb)->frags[i]; - f->page = alloc_page(GFP_ATOMIC); + f->page = get_a_page(vi, GFP_ATOMIC); if (!f->page) break; @@ -506,6 +535,7 @@ static int virtnet_probe(struct virtio_device *vdev) vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; + vi->pages = NULL; /* If they give us a callback when all buffers are done, we don't need * the timer. */ @@ -591,6 +621,10 @@ static void virtnet_remove(struct virtio_device *vdev) vdev->config->del_vq(vi->svq); vdev->config->del_vq(vi->rvq); unregister_netdev(vi->dev); + + while (vi->pages) + __free_pages(get_a_page(vi, GFP_KERNEL), 0); + free_netdev(vi->dev); } -- cgit v1.2.3-18-g5258 From 674bfc23c585b34c42263d73fb51710d49762a23 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Jul 2008 12:06:03 -0500 Subject: virtio: clarify that ABI is usable by any implementations We want others to implement and use virtio, so it makes sense to BSD license the non-__KERNEL__ parts of the headers to make this crystal clear. Signed-off-by: Rusty Russell Acked-by: Christian Borntraeger Acked-by: Mark McLoughlin Acked-by: Ryan Harper Acked-by: Eric Van Hensbergen Acked-by: Anthony Liguori --- include/linux/virtio_9p.h | 2 ++ include/linux/virtio_balloon.h | 2 ++ include/linux/virtio_blk.h | 2 ++ include/linux/virtio_config.h | 3 +++ include/linux/virtio_console.h | 2 ++ include/linux/virtio_net.h | 2 ++ include/linux/virtio_pci.h | 5 ++--- include/linux/virtio_rng.h | 2 ++ 8 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index 8eff0b53910..b3c4a60ceeb 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_9P_H #define _LINUX_VIRTIO_9P_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio console */ diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index 979524ee75b..c30c7bfbf39 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_BALLOON_H #define _LINUX_VIRTIO_BALLOON_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio_balloon */ diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 5f79a5f9de7..6a66c7f30bc 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_BLK_H #define _LINUX_VIRTIO_BLK_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio_block */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index f364bbf63c3..7eb4b34d13b 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -1,5 +1,8 @@ #ifndef _LINUX_VIRTIO_CONFIG_H #define _LINUX_VIRTIO_CONFIG_H +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. */ + /* Virtio devices use a standardized configuration space to define their * features and pass configuration information, but each implementation can * store and access that space differently. */ diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index ed2d4ead7eb..19a0da0dba4 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -1,6 +1,8 @@ #ifndef _LINUX_VIRTIO_CONSOLE_H #define _LINUX_VIRTIO_CONSOLE_H #include +/* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so + * anyone can use the definitions to implement compatible drivers/servers. */ /* The ID for virtio console */ #define VIRTIO_ID_CONSOLE 3 diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 38c0571820f..5e33761b9b8 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio_net */ diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index b3151659cf4..cdef3574293 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -9,9 +9,8 @@ * Authors: * Anthony Liguori * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #ifndef _LINUX_VIRTIO_PCI_H diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h index 331afb6c9f6..1a85dab8a94 100644 --- a/include/linux/virtio_rng.h +++ b/include/linux/virtio_rng.h @@ -1,5 +1,7 @@ #ifndef _LINUX_VIRTIO_RNG_H #define _LINUX_VIRTIO_RNG_H +/* This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. */ #include /* The ID for virtio_rng */ -- cgit v1.2.3-18-g5258 From 44653eae1407f79dff6f52fcf594ae84cb165ec4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Jul 2008 12:06:04 -0500 Subject: virtio: don't always force a notification when ring is full We force notification when the ring is full, even if the host has indicated it doesn't want to know. This seemed like a good idea at the time: if we fill the transmit ring, we should tell the host immediately. Unfortunately this logic also applies to the receiving ring, which is refilled constantly. We should introduce real notification thesholds to replace this logic. Meanwhile, removing the logic altogether breaks the heuristics which KVM uses, so we use a hack: only notify if there are outgoing parts of the new buffer. Here are the number of exits with lguest's crappy network implementation: Before: network xmit 7859051 recv 236420 After: network xmit 7858610 recv 118136 Signed-off-by: Rusty Russell --- drivers/virtio/virtio_ring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 72bf8bc0901..21d9a62767a 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -87,8 +87,11 @@ static int vring_add_buf(struct virtqueue *_vq, if (vq->num_free < out + in) { pr_debug("Can't add buf len %i - avail = %i\n", out + in, vq->num_free); - /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */ - vq->notify(&vq->vq); + /* FIXME: for historical reasons, we force a notify here if + * there are outgoing parts to the buffer. Presumably the + * host should service the ring ASAP. */ + if (out) + vq->notify(&vq->vq); END_USE(vq); return -ENOSPC; } -- cgit v1.2.3-18-g5258 From e962fa660d391fc9b90988e6538c94c858c099f9 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Fri, 13 Jun 2008 13:46:40 +0100 Subject: virtio: Use bus_type probe and remove methods Hook up to the probe() and remove() methods in bus_type rather than device_driver. The latter has been preferred since 2.6.16. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell --- drivers/virtio/virtio.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7084e7e146c..fc85cba6457 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -71,13 +71,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) dev->id.device, dev->id.vendor); } -static struct bus_type virtio_bus = { - .name = "virtio", - .match = virtio_dev_match, - .dev_attrs = virtio_dev_attrs, - .uevent = virtio_uevent, -}; - static void add_status(struct virtio_device *dev, unsigned status) { dev->config->set_status(dev, dev->config->get_status(dev) | status); @@ -147,13 +140,20 @@ static int virtio_dev_remove(struct device *_d) return 0; } +static struct bus_type virtio_bus = { + .name = "virtio", + .match = virtio_dev_match, + .dev_attrs = virtio_dev_attrs, + .uevent = virtio_uevent, + .probe = virtio_dev_probe, + .remove = virtio_dev_remove, +}; + int register_virtio_driver(struct virtio_driver *driver) { /* Catch this early. */ BUG_ON(driver->feature_table_size && !driver->feature_table); driver->driver.bus = &virtio_bus; - driver->driver.probe = virtio_dev_probe; - driver->driver.remove = virtio_dev_remove; return driver_register(&driver->driver); } EXPORT_SYMBOL_GPL(register_virtio_driver); -- cgit v1.2.3-18-g5258 From 066f4d82a67f621ddd547bfa4b9c94631d8457b0 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 29 May 2008 11:08:26 +0200 Subject: virtio_blk: check for hardsector size from host Currently virtio_blk assumes a 512 byte hard sector size. This can cause trouble / performance issues if the backing has a different block size (like a file on an ext3 file system formatted with 4k block size or a dasd). Lets add a feature flag that tells the guest to use a different hard sector size than 512 byte. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- drivers/block/virtio_blk.c | 10 +++++++++- include/linux/virtio_blk.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dd7ea203f94..42251095134 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -196,6 +196,7 @@ static int virtblk_probe(struct virtio_device *vdev) int err; u64 cap; u32 v; + u32 blk_size; if (index_to_minor(index) >= 1 << MINORBITS) return -ENOSPC; @@ -290,6 +291,13 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err) blk_queue_max_hw_segments(vblk->disk->queue, v); + /* Host can optionally specify the block size of the device */ + err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE, + offsetof(struct virtio_blk_config, blk_size), + &blk_size); + if (!err) + blk_queue_hardsect_size(vblk->disk->queue, blk_size); + add_disk(vblk->disk); return 0; @@ -330,7 +338,7 @@ static struct virtio_device_id id_table[] = { static unsigned int features[] = { VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, - VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, + VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, }; static struct virtio_driver virtio_blk = { diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 6a66c7f30bc..c1aef85243b 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -13,6 +13,7 @@ #define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ #define VIRTIO_BLK_F_GEOMETRY 4 /* Legacy geometry available */ #define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ +#define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/ struct virtio_blk_config { @@ -28,6 +29,8 @@ struct virtio_blk_config __u8 heads; __u8 sectors; } geometry; + /* block size of device (if VIRTIO_BLK_F_BLK_SIZE) */ + __u32 blk_size; } __attribute__((packed)); /* These two define direction. */ -- cgit v1.2.3-18-g5258 From 611e097d7707741a336a0677d9d69bec40f29f3d Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 20 Jun 2008 15:24:08 +0200 Subject: hvc_console: rework setup to replace irq functions with callbacks This patch tries to change hvc_console to not use request_irq/free_irq if the backend does not use irqs. This allows virtio_console to use hvc_console without having a linker reference to request_irq/free_irq. In addition, together with patch 2/3 it improves the performance for virtio console input. (an earlier version of this patch was tested by Yajin on lguest) The irq specific code is moved to hvc_irq.c and selected by the drivers that use irqs (System p, System i, XEN). I replaced "int irq" with the opaque "int data". The request_irq and free_irq calls are replaced with notifier_add and notifier_del. I have also changed the code a bit to call the notifier_add and notifier_del inside the spinlock area as the callbacks are found via hp->ops. Changes since last version: o remove ifdef o reintroduce "irq_requested" as "notified" o cleanups, sparse.. I did not move the timer based polling into a separate polling scheme. I played with several variants, but it seems we need to sleep/schedule in a thread even for irq based consoles, as there are throttleing and buffer size constraints. I also kept hvc_struct defined in hvc_console.h so that hvc_irq.c can access the irq_requested element. Feedback is appreciated. virtio_console is currently the only available console for kvm on s390. I plan to push this change as soon as all affected parties agree on it. I would love to get test results from System p, Xen etc. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- drivers/char/Kconfig | 5 +++ drivers/char/Makefile | 1 + drivers/char/hvc_console.c | 81 +++++++++++----------------------------------- drivers/char/hvc_console.h | 35 +++++++++++++++++--- drivers/char/hvc_irq.c | 44 +++++++++++++++++++++++++ drivers/char/hvc_iseries.c | 2 ++ drivers/char/hvc_vio.c | 2 ++ drivers/char/hvc_xen.c | 2 ++ 8 files changed, 105 insertions(+), 67 deletions(-) create mode 100644 drivers/char/hvc_irq.c diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 67b07576f8b..d825361a6ba 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -578,11 +578,14 @@ config HVC_DRIVER It will automatically be selected if one of the back-end console drivers is selected. +config HVC_IRQ + bool config HVC_CONSOLE bool "pSeries Hypervisor Virtual Console support" depends on PPC_PSERIES select HVC_DRIVER + select HVC_IRQ help pSeries machines when partitioned support a hypervisor virtual console. This driver allows each pSeries partition to have a console @@ -593,6 +596,7 @@ config HVC_ISERIES depends on PPC_ISERIES default y select HVC_DRIVER + select HVC_IRQ help iSeries machines support a hypervisor virtual console. @@ -614,6 +618,7 @@ config HVC_XEN bool "Xen Hypervisor Console support" depends on XEN select HVC_DRIVER + select HVC_IRQ default y help Xen virtual console device driver diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 4b6e736cfa0..eb02c350680 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_HVC_ISERIES) += hvc_iseries.o obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o obj-$(CONFIG_HVC_BEAT) += hvc_beat.o obj-$(CONFIG_HVC_DRIVER) += hvc_console.o +obj-$(CONFIG_HVC_IRQ) += hvc_irq.o obj-$(CONFIG_HVC_XEN) += hvc_xen.o obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o obj-$(CONFIG_RAW_DRIVER) += raw.o diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 2f9759d625c..2f5b7fb6704 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -75,23 +74,6 @@ static int hvc_init(void); static int sysrq_pressed; #endif -struct hvc_struct { - spinlock_t lock; - int index; - struct tty_struct *tty; - unsigned int count; - int do_wakeup; - char *outbuf; - int outbuf_size; - int n_outbuf; - uint32_t vtermno; - struct hv_ops *ops; - int irq_requested; - int irq; - struct list_head next; - struct kref kref; /* ref count & hvc_struct lifetime */ -}; - /* dynamic list of hvc_struct instances */ static LIST_HEAD(hvc_structs); @@ -300,26 +282,12 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops) } /* Wake the sleeping khvcd */ -static void hvc_kick(void) +void hvc_kick(void) { hvc_kicked = 1; wake_up_process(hvc_task); } -static int hvc_poll(struct hvc_struct *hp); - -/* - * NOTE: This API isn't used if the console adapter doesn't support interrupts. - * In this case the console is poll driven. - */ -static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) -{ - /* if hvc_poll request a repoll, then kick the hvcd thread */ - if (hvc_poll(dev_instance)) - hvc_kick(); - return IRQ_HANDLED; -} - static void hvc_unthrottle(struct tty_struct *tty) { hvc_kick(); @@ -333,7 +301,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) { struct hvc_struct *hp; unsigned long flags; - int irq = 0; int rc = 0; /* Auto increments kref reference if found. */ @@ -352,18 +319,15 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */ hp->tty = tty; - /* Save for request_irq outside of spin_lock. */ - irq = hp->irq; - if (irq) - hp->irq_requested = 1; + + if (hp->ops->notifier_add) + rc = hp->ops->notifier_add(hp, hp->data); spin_unlock_irqrestore(&hp->lock, flags); - /* check error, fallback to non-irq */ - if (irq) - rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, "hvc_console", hp); + /* - * If the request_irq() fails and we return an error. The tty layer + * If the notifier fails we return an error. The tty layer * will call hvc_close() after a failed open but we don't want to clean * up there so we'll clean up here and clear out the previously set * tty fields and return the kref reference. @@ -371,7 +335,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) if (rc) { spin_lock_irqsave(&hp->lock, flags); hp->tty = NULL; - hp->irq_requested = 0; spin_unlock_irqrestore(&hp->lock, flags); tty->driver_data = NULL; kref_put(&hp->kref, destroy_hvc_struct); @@ -386,7 +349,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) static void hvc_close(struct tty_struct *tty, struct file * filp) { struct hvc_struct *hp; - int irq = 0; unsigned long flags; if (tty_hung_up_p(filp)) @@ -404,9 +366,8 @@ static void hvc_close(struct tty_struct *tty, struct file * filp) spin_lock_irqsave(&hp->lock, flags); if (--hp->count == 0) { - if (hp->irq_requested) - irq = hp->irq; - hp->irq_requested = 0; + if (hp->ops->notifier_del) + hp->ops->notifier_del(hp, hp->data); /* We are done with the tty pointer now. */ hp->tty = NULL; @@ -418,10 +379,6 @@ static void hvc_close(struct tty_struct *tty, struct file * filp) * waking periodically to check chars_in_buffer(). */ tty_wait_until_sent(tty, HVC_CLOSE_WAIT); - - if (irq) - free_irq(irq, hp); - } else { if (hp->count < 0) printk(KERN_ERR "hvc_close %X: oops, count is %d\n", @@ -436,7 +393,6 @@ static void hvc_hangup(struct tty_struct *tty) { struct hvc_struct *hp = tty->driver_data; unsigned long flags; - int irq = 0; int temp_open_count; if (!hp) @@ -458,13 +414,12 @@ static void hvc_hangup(struct tty_struct *tty) hp->count = 0; hp->n_outbuf = 0; hp->tty = NULL; - if (hp->irq_requested) - /* Saved for use outside of spin_lock. */ - irq = hp->irq; - hp->irq_requested = 0; + + if (hp->ops->notifier_del) + hp->ops->notifier_del(hp, hp->data); + spin_unlock_irqrestore(&hp->lock, flags); - if (irq) - free_irq(irq, hp); + while(temp_open_count) { --temp_open_count; kref_put(&hp->kref, destroy_hvc_struct); @@ -575,7 +530,7 @@ static u32 timeout = MIN_TIMEOUT; #define HVC_POLL_READ 0x00000001 #define HVC_POLL_WRITE 0x00000002 -static int hvc_poll(struct hvc_struct *hp) +int hvc_poll(struct hvc_struct *hp) { struct tty_struct *tty; int i, n, poll_mask = 0; @@ -602,10 +557,10 @@ static int hvc_poll(struct hvc_struct *hp) if (test_bit(TTY_THROTTLED, &tty->flags)) goto throttled; - /* If we aren't interrupt driven and aren't throttled, we always + /* If we aren't notifier driven and aren't throttled, we always * request a reschedule */ - if (hp->irq == 0) + if (!hp->irq_requested) poll_mask |= HVC_POLL_READ; /* Read data if any */ @@ -733,7 +688,7 @@ static const struct tty_operations hvc_ops = { .chars_in_buffer = hvc_chars_in_buffer, }; -struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, +struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data, struct hv_ops *ops, int outbuf_size) { struct hvc_struct *hp; @@ -754,7 +709,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int irq, memset(hp, 0x00, sizeof(*hp)); hp->vtermno = vtermno; - hp->irq = irq; + hp->data = data; hp->ops = ops; hp->outbuf_size = outbuf_size; hp->outbuf = &((char *)hp)[ALIGN(sizeof(*hp), sizeof(long))]; diff --git a/drivers/char/hvc_console.h b/drivers/char/hvc_console.h index 42ffb17e15d..d9ce1091562 100644 --- a/drivers/char/hvc_console.h +++ b/drivers/char/hvc_console.h @@ -26,6 +26,7 @@ #ifndef HVC_CONSOLE_H #define HVC_CONSOLE_H +#include /* * This is the max number of console adapters that can/will be found as @@ -42,24 +43,50 @@ */ #define HVC_ALLOC_TTY_ADAPTERS 8 +struct hvc_struct { + spinlock_t lock; + int index; + struct tty_struct *tty; + unsigned int count; + int do_wakeup; + char *outbuf; + int outbuf_size; + int n_outbuf; + uint32_t vtermno; + struct hv_ops *ops; + int irq_requested; + int data; + struct list_head next; + struct kref kref; /* ref count & hvc_struct lifetime */ +}; /* implemented by a low level driver */ struct hv_ops { int (*get_chars)(uint32_t vtermno, char *buf, int count); int (*put_chars)(uint32_t vtermno, const char *buf, int count); -}; -struct hvc_struct; + /* Callbacks for notification. Called in open and close */ + int (*notifier_add)(struct hvc_struct *hp, int irq); + void (*notifier_del)(struct hvc_struct *hp, int irq); +}; /* Register a vterm and a slot index for use as a console (console_init) */ extern int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops); /* register a vterm for hvc tty operation (module_init or hotplug add) */ -extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int irq, +extern struct hvc_struct * __devinit hvc_alloc(uint32_t vtermno, int data, struct hv_ops *ops, int outbuf_size); -/* remove a vterm from hvc tty operation (modele_exit or hotplug remove) */ +/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */ extern int __devexit hvc_remove(struct hvc_struct *hp); +/* data available */ +int hvc_poll(struct hvc_struct *hp); +void hvc_kick(void); + +/* default notifier for irq based notification */ +extern int notifier_add_irq(struct hvc_struct *hp, int data); +extern void notifier_del_irq(struct hvc_struct *hp, int data); + #if defined(CONFIG_XMON) && defined(CONFIG_SMP) #include diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c new file mode 100644 index 00000000000..73a59cdb894 --- /dev/null +++ b/drivers/char/hvc_irq.c @@ -0,0 +1,44 @@ +/* + * Copyright IBM Corp. 2001,2008 + * + * This file contains the IRQ specific code for hvc_console + * + */ + +#include + +#include "hvc_console.h" + +static irqreturn_t hvc_handle_interrupt(int irq, void *dev_instance) +{ + /* if hvc_poll request a repoll, then kick the hvcd thread */ + if (hvc_poll(dev_instance)) + hvc_kick(); + return IRQ_HANDLED; +} + +/* + * For IRQ based systems these callbacks can be used + */ +int notifier_add_irq(struct hvc_struct *hp, int irq) +{ + int rc; + + if (!irq) { + hp->irq_requested = 0; + return 0; + } + rc = request_irq(irq, hvc_handle_interrupt, IRQF_DISABLED, + "hvc_console", hp); + if (!rc) + hp->irq_requested = 1; + return rc; +} + +void notifier_del_irq(struct hvc_struct *hp, int irq) +{ + if (!irq) + return; + free_irq(irq, hp); + hp->irq_requested = 0; +} diff --git a/drivers/char/hvc_iseries.c b/drivers/char/hvc_iseries.c index a08f8f981c1..b71c610fe5a 100644 --- a/drivers/char/hvc_iseries.c +++ b/drivers/char/hvc_iseries.c @@ -200,6 +200,8 @@ done: static struct hv_ops hvc_get_put_ops = { .get_chars = get_chars, .put_chars = put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __devinit hvc_vio_probe(struct vio_dev *vdev, diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 79711aa4b41..93f3840c168 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -80,6 +80,8 @@ static int filtered_get_chars(uint32_t vtermno, char *buf, int count) static struct hv_ops hvc_get_put_ops = { .get_chars = filtered_get_chars, .put_chars = hvc_put_chars, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __devinit hvc_vio_probe(struct vio_dev *vdev, diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index db2ae421627..6b70aa66a58 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -100,6 +100,8 @@ static int read_console(uint32_t vtermno, char *buf, int len) static struct hv_ops hvc_ops = { .get_chars = read_console, .put_chars = write_console, + .notifier_add = notifier_add_irq, + .notifier_del = notifier_del_irq, }; static int __init xen_init(void) -- cgit v1.2.3-18-g5258 From 91fcad19d03ed67cb50fd0e1913a8b89cc3ed3ec Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 20 Jun 2008 15:24:15 +0200 Subject: virtio_console: use virtqueue notification for hvc_console This patch exploits the new notifier callbacks of the hvc_console. We can use the virtio callbacks instead of the polling code. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- drivers/char/virtio_console.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index dc17fe3a88b..d0f4eb6fdb7 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -46,6 +46,9 @@ static char *in, *inbuf; /* The operations for our console. */ static struct hv_ops virtio_cons; +/* The hvc device */ +static struct hvc_struct *hvc; + /*D:310 The put_chars() callback is pretty straightforward. * * We turn the characters into a scatter-gather list, add it to the output @@ -134,6 +137,27 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) return hvc_instantiate(0, 0, &virtio_cons); } +/* + * we support only one console, the hvc struct is a global var + * There is no need to do anything + */ +static int notifier_add_vio(struct hvc_struct *hp, int data) +{ + hp->irq_requested = 1; + return 0; +} + +static void notifier_del_vio(struct hvc_struct *hp, int data) +{ + hp->irq_requested = 0; +} + +static void hvc_handle_input(struct virtqueue *vq) +{ + if (hvc_poll(hvc)) + hvc_kick(); +} + /*D:370 Once we're further in boot, we get probed like any other virtio device. * At this stage we set up the output virtqueue. * @@ -144,7 +168,6 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int)) static int __devinit virtcons_probe(struct virtio_device *dev) { int err; - struct hvc_struct *hvc; vdev = dev; @@ -158,7 +181,7 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Find the input queue. */ /* FIXME: This is why we want to wean off hvc: we do nothing * when input comes in. */ - in_vq = vdev->config->find_vq(vdev, 0, NULL); + in_vq = vdev->config->find_vq(vdev, 0, hvc_handle_input); if (IS_ERR(in_vq)) { err = PTR_ERR(in_vq); goto free; @@ -173,15 +196,18 @@ static int __devinit virtcons_probe(struct virtio_device *dev) /* Start using the new console output. */ virtio_cons.get_chars = get_chars; virtio_cons.put_chars = put_chars; + virtio_cons.notifier_add = notifier_add_vio; + virtio_cons.notifier_del = notifier_del_vio; /* The first argument of hvc_alloc() is the virtual console number, so - * we use zero. The second argument is the interrupt number; we - * currently leave this as zero: it would be better not to use the - * hvc mechanism and fix this (FIXME!). + * we use zero. The second argument is the parameter for the + * notification mechanism (like irq number). We currently leave this + * as zero, virtqueues have implicit notifications. * * The third argument is a "struct hv_ops" containing the put_chars() - * and get_chars() pointers. The final argument is the output buffer - * size: we can do any size, so we put PAGE_SIZE here. */ + * get_chars(), notifier_add() and notifier_del() pointers. + * The final argument is the output buffer size: we can do any size, + * so we put PAGE_SIZE here. */ hvc = hvc_alloc(0, 0, &virtio_cons, PAGE_SIZE); if (IS_ERR(hvc)) { err = PTR_ERR(hvc); -- cgit v1.2.3-18-g5258 From 7721c494a28e06543a3d6aa412957aa783a4a531 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 25 Jul 2008 12:06:06 -0500 Subject: virtio: console as a config option I also added a small Kconfig change that allows the user to specify the virtio console in menuconfig. (Fixes to export symbols from Stephen Rothwell ) (Fixes for CONFIG_VIRTIO_CONSOLE=y vs CONFIG_VIRTIO=m from Christian himself) Signed-off-by: Rusty Russell Cc: Stephen Rothwell --- drivers/char/Kconfig | 6 +++++- drivers/char/hvc_console.c | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index d825361a6ba..6c070dc5f2d 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -624,8 +624,12 @@ config HVC_XEN Xen virtual console device driver config VIRTIO_CONSOLE - bool + tristate "Virtio console" + depends on VIRTIO select HVC_DRIVER + help + Virtio console for use with lguest and other hypervisors. + config HVCS tristate "IBM Hypervisor Virtual Console Server support" diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 2f5b7fb6704..02aac104842 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -280,6 +280,7 @@ int hvc_instantiate(uint32_t vtermno, int index, struct hv_ops *ops) return 0; } +EXPORT_SYMBOL_GPL(hvc_instantiate); /* Wake the sleeping khvcd */ void hvc_kick(void) @@ -287,6 +288,7 @@ void hvc_kick(void) hvc_kicked = 1; wake_up_process(hvc_task); } +EXPORT_SYMBOL_GPL(hvc_kick); static void hvc_unthrottle(struct tty_struct *tty) { @@ -629,6 +631,7 @@ int hvc_poll(struct hvc_struct *hp) return poll_mask; } +EXPORT_SYMBOL_GPL(hvc_poll); /* * This kthread is either polling or interrupt driven. This is determined by @@ -739,6 +742,7 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data, return hp; } +EXPORT_SYMBOL_GPL(hvc_alloc); int __devexit hvc_remove(struct hvc_struct *hp) { -- cgit v1.2.3-18-g5258 From faeba830b086bc9e58748869054e994cb09693cd Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 20 Jun 2008 15:24:18 +0200 Subject: s390: use virtio_console for KVM on s390 This patch enables virtio_console as the default console on kvm for s390. We currently use the same notify hack as lguest for early console output. I will try to address this for lguest and s390 later. Signed-off-by: Christian Borntraeger Signed-off-by: Rusty Russell --- arch/s390/Kconfig | 1 + arch/s390/kernel/setup.c | 4 +++- drivers/s390/kvm/kvm_virtio.c | 20 ++++++++++++++++++++ include/asm-s390/kvm_virtio.h | 10 ++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index eb530b4128b..2ed88122be9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -565,6 +565,7 @@ bool "s390 guest support (EXPERIMENTAL)" depends on 64BIT && EXPERIMENTAL select VIRTIO select VIRTIO_RING + select VIRTIO_CONSOLE help Select this option if you want to run the kernel under s390 linux endmenu diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b358e18273b..62122bad1e3 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -54,6 +54,7 @@ #include #include #include +#include long psw_kernel_bits = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY | PSW_MASK_MCHECK | PSW_DEFAULT_KEY); @@ -766,7 +767,8 @@ setup_arch(char **cmdline_p) printk("We are running under VM (64 bit mode)\n"); else if (MACHINE_IS_KVM) { printk("We are running under KVM (64 bit mode)\n"); - add_preferred_console("ttyS", 1, NULL); + add_preferred_console("hvc", 0, NULL); + s390_virtio_console_init(); } else printk("We are running native (64 bit mode)\n"); #endif /* CONFIG_64BIT */ diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 5ab34340919..d41f234bb2c 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -333,6 +334,25 @@ static int __init kvm_devices_init(void) return 0; } +/* code for early console output with virtio_console */ +static __init int early_put_chars(u32 vtermno, const char *buf, int count) +{ + char scratch[17]; + unsigned int len = count; + + if (len > sizeof(scratch) - 1) + len = sizeof(scratch) - 1; + scratch[len] = '\0'; + memcpy(scratch, buf, len); + kvm_hypercall1(KVM_S390_VIRTIO_NOTIFY, __pa(scratch)); + return len; +} + +void s390_virtio_console_init(void) +{ + virtio_cons_early_init(early_put_chars); +} + /* * We do this after core stuff, but before the drivers. */ diff --git a/include/asm-s390/kvm_virtio.h b/include/asm-s390/kvm_virtio.h index 5c871a990c2..146100224de 100644 --- a/include/asm-s390/kvm_virtio.h +++ b/include/asm-s390/kvm_virtio.h @@ -50,4 +50,14 @@ struct kvm_vqconfig { #define KVM_S390_VIRTIO_RESET 1 #define KVM_S390_VIRTIO_SET_STATUS 2 +#ifdef __KERNEL__ +/* early virtio console setup */ +#ifdef CONFIG_VIRTIO_CONSOLE +extern void s390_virtio_console_init(void); +#else +static inline void s390_virtio_console_init(void) +{ +} +#endif /* CONFIG_VIRTIO_CONSOLE */ +#endif /* __KERNEL__ */ #endif -- cgit v1.2.3-18-g5258 From dd7c7bc46211785a1aa7d70feb15830f62682b3c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Jul 2008 12:06:07 -0500 Subject: virtio: Formally reserve bits 28-31 to be 'transport' features. We assign feature bits as required, but it makes sense to reserve some for the particular transport, rather than the particular device. Signed-off-by: Rusty Russell --- drivers/virtio/virtio.c | 5 +++++ include/linux/virtio_config.h | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index fc85cba6457..baf103361e3 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -113,6 +113,11 @@ static int virtio_dev_probe(struct device *_d) set_bit(f, dev->features); } + /* Transport features are always preserved to pass to set_features. */ + for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) + if (device_features & (1 << i)) + set_bit(i, dev->features); + err = drv->probe(dev); if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 7eb4b34d13b..5a30cfb7934 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -18,6 +18,12 @@ /* We've given up on this device. */ #define VIRTIO_CONFIG_S_FAILED 0x80 +/* Some virtio feature bits (currently bits 28 through 31) are reserved for the + * transport being used (eg. virtio_ring), the rest are per-device feature + * bits. */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 32 + /* Do we get callbacks when the ring is completely used, even if we've * suppressed them? */ #define VIRTIO_F_NOTIFY_ON_EMPTY 24 -- cgit v1.2.3-18-g5258 From c624896e488ba2bff5ae497782cfb265c8b00646 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Jul 2008 12:06:07 -0500 Subject: virtio: Rename set_features to finalize_features Rather than explicitly handing the features to the lower-level, we just hand the virtio_device and have it set the features. This make it clear that it has the chance to manipulate the features of the device at this point (and that all feature negotiation is already done). Signed-off-by: Rusty Russell --- drivers/lguest/lguest_device.c | 11 ++++++----- drivers/s390/kvm/kvm_virtio.c | 11 ++++++----- drivers/virtio/virtio.c | 5 ++--- drivers/virtio/virtio_pci.c | 10 ++++++---- include/linux/virtio_config.h | 7 ++++--- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 1a8de57289e..54fdc2aa480 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -98,16 +98,17 @@ static u32 lg_get_features(struct virtio_device *vdev) return features; } -static void lg_set_features(struct virtio_device *vdev, u32 features) +static void lg_finalize_features(struct virtio_device *vdev) { - unsigned int i; + unsigned int i, bits; struct lguest_device_desc *desc = to_lgdev(vdev)->desc; /* Second half of bitmap is features we accept. */ u8 *out_features = lg_features(desc) + desc->feature_len; memset(out_features, 0, desc->feature_len); - for (i = 0; i < min(desc->feature_len * 8, 32); i++) { - if (features & (1 << i)) + bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (test_bit(i, vdev->features)) out_features[i / 8] |= (1 << (i % 8)); } } @@ -297,7 +298,7 @@ static void lg_del_vq(struct virtqueue *vq) /* The ops structure which hooks everything together. */ static struct virtio_config_ops lguest_config_ops = { .get_features = lg_get_features, - .set_features = lg_set_features, + .finalize_features = lg_finalize_features, .get = lg_get, .set = lg_set, .get_status = lg_get_status, diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index d41f234bb2c..5953510e7d5 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -88,16 +88,17 @@ static u32 kvm_get_features(struct virtio_device *vdev) return features; } -static void kvm_set_features(struct virtio_device *vdev, u32 features) +static void kvm_finalize_features(struct virtio_device *vdev) { - unsigned int i; + unsigned int i, bits; struct kvm_device_desc *desc = to_kvmdev(vdev)->desc; /* Second half of bitmap is features we accept. */ u8 *out_features = kvm_vq_features(desc) + desc->feature_len; memset(out_features, 0, desc->feature_len); - for (i = 0; i < min(desc->feature_len * 8, 32); i++) { - if (features & (1 << i)) + bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; + for (i = 0; i < bits; i++) { + if (test_bit(i, vdev->features)) out_features[i / 8] |= (1 << (i % 8)); } } @@ -223,7 +224,7 @@ static void kvm_del_vq(struct virtqueue *vq) */ static struct virtio_config_ops kvm_vq_configspace_ops = { .get_features = kvm_get_features, - .set_features = kvm_set_features, + .finalize_features = kvm_finalize_features, .get = kvm_get, .set = kvm_set, .get_status = kvm_get_status, diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index baf103361e3..5b78fd0aff0 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -113,7 +113,7 @@ static int virtio_dev_probe(struct device *_d) set_bit(f, dev->features); } - /* Transport features are always preserved to pass to set_features. */ + /* Transport features always preserved to pass to finalize_features. */ for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) if (device_features & (1 << i)) set_bit(i, dev->features); @@ -122,8 +122,7 @@ static int virtio_dev_probe(struct device *_d) if (err) add_status(dev, VIRTIO_CONFIG_S_FAILED); else { - /* They should never have set feature bits beyond 32 */ - dev->config->set_features(dev, dev->features[0]); + dev->config->finalize_features(dev); add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } return err; diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index eae7236310e..9855975a72a 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -94,12 +94,14 @@ static u32 vp_get_features(struct virtio_device *vdev) return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); } -/* virtio config->set_features() implementation */ -static void vp_set_features(struct virtio_device *vdev, u32 features) +/* virtio config->finalize_features() implementation */ +static void vp_finalize_features(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); + /* We only support 32 feature bits. */ + BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1); + iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES); } /* virtio config->get() implementation */ @@ -297,7 +299,7 @@ static struct virtio_config_ops virtio_pci_config_ops = { .find_vq = vp_find_vq, .del_vq = vp_del_vq, .get_features = vp_get_features, - .set_features = vp_set_features, + .finalize_features = vp_finalize_features, }; /* the PCI probing function */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 5a30cfb7934..bf8ec283b23 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -61,9 +61,10 @@ * @get_features: get the array of feature bits for this device. * vdev: the virtio_device * Returns the first 32 feature bits (all we currently need). - * @set_features: confirm what device features we'll be using. + * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device - * feature: the first 32 feature bits + * This gives the final feature bits for the device: it can change + * the dev->feature bits if it wants. */ struct virtio_config_ops { @@ -79,7 +80,7 @@ struct virtio_config_ops void (*callback)(struct virtqueue *)); void (*del_vq)(struct virtqueue *vq); u32 (*get_features)(struct virtio_device *vdev); - void (*set_features)(struct virtio_device *vdev, u32 features); + void (*finalize_features)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ -- cgit v1.2.3-18-g5258 From e34f87256794b87e7f4a8f1812538be7b7b5214c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Jul 2008 12:06:13 -0500 Subject: virtio: Add transport feature handling stub for virtio_ring. To prepare for virtio_ring transport feature bits, hook in a call in all the users to manipulate them. This currently just clears all the bits, since it doesn't understand any features. Signed-off-by: Rusty Russell --- drivers/lguest/lguest_device.c | 3 +++ drivers/s390/kvm/kvm_virtio.c | 3 +++ drivers/virtio/virtio_pci.c | 3 +++ drivers/virtio/virtio_ring.c | 16 ++++++++++++++++ include/linux/virtio_ring.h | 2 ++ 5 files changed, 27 insertions(+) diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c index 54fdc2aa480..37344aaee22 100644 --- a/drivers/lguest/lguest_device.c +++ b/drivers/lguest/lguest_device.c @@ -105,6 +105,9 @@ static void lg_finalize_features(struct virtio_device *vdev) /* Second half of bitmap is features we accept. */ u8 *out_features = lg_features(desc) + desc->feature_len; + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c index 5953510e7d5..79954bd6bfa 100644 --- a/drivers/s390/kvm/kvm_virtio.c +++ b/drivers/s390/kvm/kvm_virtio.c @@ -95,6 +95,9 @@ static void kvm_finalize_features(struct virtio_device *vdev) /* Second half of bitmap is features we accept. */ u8 *out_features = kvm_vq_features(desc) + desc->feature_len; + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + memset(out_features, 0, desc->feature_len); bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8; for (i = 0; i < bits; i++) { diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 9855975a72a..c7dc37c7cce 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -99,6 +99,9 @@ static void vp_finalize_features(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + /* Give virtio_ring a chance to accept features. */ + vring_transport_features(vdev); + /* We only support 32 feature bits. */ BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1); iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES); diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 21d9a62767a..6eb5303fed1 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -18,6 +18,7 @@ */ #include #include +#include #include #ifdef DEBUG @@ -323,4 +324,19 @@ void vring_del_virtqueue(struct virtqueue *vq) } EXPORT_SYMBOL_GPL(vring_del_virtqueue); +/* Manipulates transport-specific feature bits. */ +void vring_transport_features(struct virtio_device *vdev) +{ + unsigned int i; + + for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) { + switch (i) { + default: + /* We don't understand this bit. */ + clear_bit(i, vdev->features); + } + } +} +EXPORT_SYMBOL_GPL(vring_transport_features); + MODULE_LICENSE("GPL"); diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index abe481ed990..c4a598fb382 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -120,6 +120,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, void (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq)); void vring_del_virtqueue(struct virtqueue *vq); +/* Filter out transport-specific feature bits. */ +void vring_transport_features(struct virtio_device *vdev); irqreturn_t vring_interrupt(int irq, void *_vq); #endif /* __KERNEL__ */ -- cgit v1.2.3-18-g5258