From 0acf10d8fbd52926217d3933d196b33fe2468f18 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:30:59 +0100 Subject: xen: add raw console write functions for debug Add a couple of functions which can write directly to the Xen console for debugging. This output ends up on the host's dom0 console (assuming it allows the domain to write there). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/xen/hvc-console.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/xen') diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index 21c0ecfd786..efc3237ab99 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,4 +3,7 @@ extern struct console xenboot_console; +void xen_raw_console_write(const char *str); +void xen_raw_printk(const char *fmt, ...); + #endif /* XEN_HVC_CONSOLE_H */ -- cgit v1.2.3-70-g09d2 From 6ba0e7b36c7cc1745b3cbeda244d14edae3ad058 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:08 +0100 Subject: xen pvfb: Pointer z-axis (mouse wheel) support Add z-axis motion to pointer events. Backward compatible, because there's space for the z-axis in union xenkbd_in_event, and old backends zero it. Derived from http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/57dfe0098000 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/1edfea26a2a9 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/c3ff0b26f664 Signed-off-by: Pat Campbell Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/input/xen-kbdfront.c | 8 +++++++- include/xen/interface/io/kbdif.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/xen') diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index 0f47f4697cd..9da4452786f 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -66,6 +66,9 @@ static irqreturn_t input_handler(int rq, void *dev_id) case XENKBD_TYPE_MOTION: input_report_rel(dev, REL_X, event->motion.rel_x); input_report_rel(dev, REL_Y, event->motion.rel_y); + if (event->motion.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->motion.rel_z); break; case XENKBD_TYPE_KEY: dev = NULL; @@ -84,6 +87,9 @@ static irqreturn_t input_handler(int rq, void *dev_id) case XENKBD_TYPE_POS: input_report_abs(dev, ABS_X, event->pos.abs_x); input_report_abs(dev, ABS_Y, event->pos.abs_y); + if (event->pos.rel_z) + input_report_rel(dev, REL_WHEEL, + -event->pos.rel_z); break; } if (dev) @@ -152,7 +158,7 @@ static int __devinit xenkbd_probe(struct xenbus_device *dev, ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS); for (i = BTN_LEFT; i <= BTN_TASK; i++) set_bit(i, ptr->keybit); - ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y); + ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL); input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0); input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0); diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h index fb97f4284ff..8066c7849fb 100644 --- a/include/xen/interface/io/kbdif.h +++ b/include/xen/interface/io/kbdif.h @@ -49,6 +49,7 @@ struct xenkbd_motion { uint8_t type; /* XENKBD_TYPE_MOTION */ int32_t rel_x; /* relative X motion */ int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ }; struct xenkbd_key { @@ -61,6 +62,7 @@ struct xenkbd_position { uint8_t type; /* XENKBD_TYPE_POS */ int32_t abs_x; /* absolute X position (in FB pixels) */ int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ }; #define XENKBD_IN_EVENT_SIZE 40 -- cgit v1.2.3-70-g09d2 From e4dcff1f6e7582f76c2c9990b1d9111bbc8e26ef Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:11 +0100 Subject: xen pvfb: Dynamic mode support (screen resizing) The pvfb backend indicates dynamic mode support by creating node feature_resize with a non-zero value in its xenstore directory. xen-fbfront sends a resize notification event on mode change. Fully backwards compatible both ways. Framebuffer size and initial resolution can be controlled through kernel parameter xen_fbfront.video. The backend enforces a separate size limit, which it advertises in node videoram in its xenstore directory. xen-kbdfront gets the maximum screen resolution from nodes width and height in the backend's xenstore directory instead of hardcoding it. Additional goodie: support for larger framebuffers (512M on a 64-bit system with 4K pages). Changing the number of bits per pixels dynamically is not supported, yet. Ported from http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/92f7b3144f41 http://xenbits.xensource.com/linux-2.6.18-xen.hg?rev/bfc040135633 Signed-off-by: Pat Campbell Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/input/xen-kbdfront.c | 10 +++ drivers/video/xen-fbfront.c | 183 +++++++++++++++++++++++++++++++++------- include/xen/interface/io/fbif.h | 29 +++++-- 3 files changed, 188 insertions(+), 34 deletions(-) (limited to 'include/xen') diff --git a/drivers/input/xen-kbdfront.c b/drivers/input/xen-kbdfront.c index eaf69cf5b44..9ce3b3baf3a 100644 --- a/drivers/input/xen-kbdfront.c +++ b/drivers/input/xen-kbdfront.c @@ -300,6 +300,16 @@ InitWait: */ if (dev->state != XenbusStateConnected) goto InitWait; /* no InitWait seen yet, fudge it */ + + /* Set input abs params to match backend screen res */ + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "width", "%d", &val) > 0) + input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0); + + if (xenbus_scanf(XBT_NIL, info->xbdev->otherend, + "height", "%d", &val) > 0) + input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0); + break; case XenbusStateClosing: diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c index 291eef69559..47ed39b52f9 100644 --- a/drivers/video/xen-fbfront.c +++ b/drivers/video/xen-fbfront.c @@ -43,23 +43,47 @@ struct xenfb_info { struct xenfb_page *page; unsigned long *mfns; int update_wanted; /* XENFB_TYPE_UPDATE wanted */ + int feature_resize; /* XENFB_TYPE_RESIZE ok */ + struct xenfb_resize resize; /* protected by resize_lock */ + int resize_dpy; /* ditto */ + spinlock_t resize_lock; struct xenbus_device *xbdev; }; -static u32 xenfb_mem_len = XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8; +#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8) + +enum { KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT }; +static int video[KPARAM_CNT] = { 2, XENFB_WIDTH, XENFB_HEIGHT }; +module_param_array(video, int, NULL, 0); +MODULE_PARM_DESC(video, + "Video memory size in MB, width, height in pixels (default 2,800,600)"); static void xenfb_make_preferred_console(void); static int xenfb_remove(struct xenbus_device *); -static void xenfb_init_shared_page(struct xenfb_info *); +static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *); static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *); static void xenfb_disconnect_backend(struct xenfb_info *); +static void xenfb_send_event(struct xenfb_info *info, + union xenfb_out_event *event) +{ + u32 prod; + + prod = info->page->out_prod; + /* caller ensures !xenfb_queue_full() */ + mb(); /* ensure ring space available */ + XENFB_OUT_RING_REF(info->page, prod) = *event; + wmb(); /* ensure ring contents visible */ + info->page->out_prod = prod + 1; + + notify_remote_via_irq(info->irq); +} + static void xenfb_do_update(struct xenfb_info *info, int x, int y, int w, int h) { union xenfb_out_event event; - u32 prod; memset(&event, 0, sizeof(event)); event.type = XENFB_TYPE_UPDATE; @@ -68,14 +92,19 @@ static void xenfb_do_update(struct xenfb_info *info, event.update.width = w; event.update.height = h; - prod = info->page->out_prod; /* caller ensures !xenfb_queue_full() */ - mb(); /* ensure ring space available */ - XENFB_OUT_RING_REF(info->page, prod) = event; - wmb(); /* ensure ring contents visible */ - info->page->out_prod = prod + 1; + xenfb_send_event(info, &event); +} - notify_remote_via_irq(info->irq); +static void xenfb_do_resize(struct xenfb_info *info) +{ + union xenfb_out_event event; + + memset(&event, 0, sizeof(event)); + event.resize = info->resize; + + /* caller ensures !xenfb_queue_full() */ + xenfb_send_event(info, &event); } static int xenfb_queue_full(struct xenfb_info *info) @@ -87,12 +116,28 @@ static int xenfb_queue_full(struct xenfb_info *info) return prod - cons == XENFB_OUT_RING_LEN; } +static void xenfb_handle_resize_dpy(struct xenfb_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&info->resize_lock, flags); + if (info->resize_dpy) { + if (!xenfb_queue_full(info)) { + info->resize_dpy = 0; + xenfb_do_resize(info); + } + } + spin_unlock_irqrestore(&info->resize_lock, flags); +} + static void xenfb_refresh(struct xenfb_info *info, int x1, int y1, int w, int h) { unsigned long flags; - int y2 = y1 + h - 1; int x2 = x1 + w - 1; + int y2 = y1 + h - 1; + + xenfb_handle_resize_dpy(info); if (!info->update_wanted) return; @@ -225,6 +270,57 @@ static ssize_t xenfb_write(struct fb_info *p, const char __user *buf, return res; } +static int +xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + int required_mem_len; + + xenfb_info = info->par; + + if (!xenfb_info->feature_resize) { + if (var->xres == video[KPARAM_WIDTH] && + var->yres == video[KPARAM_HEIGHT] && + var->bits_per_pixel == xenfb_info->page->depth) { + return 0; + } + return -EINVAL; + } + + /* Can't resize past initial width and height */ + if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT]) + return -EINVAL; + + required_mem_len = var->xres * var->yres * xenfb_info->page->depth / 8; + if (var->bits_per_pixel == xenfb_info->page->depth && + var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) && + required_mem_len <= info->fix.smem_len) { + var->xres_virtual = var->xres; + var->yres_virtual = var->yres; + return 0; + } + return -EINVAL; +} + +static int xenfb_set_par(struct fb_info *info) +{ + struct xenfb_info *xenfb_info; + unsigned long flags; + + xenfb_info = info->par; + + spin_lock_irqsave(&xenfb_info->resize_lock, flags); + xenfb_info->resize.type = XENFB_TYPE_RESIZE; + xenfb_info->resize.width = info->var.xres; + xenfb_info->resize.height = info->var.yres; + xenfb_info->resize.stride = info->fix.line_length; + xenfb_info->resize.depth = info->var.bits_per_pixel; + xenfb_info->resize.offset = 0; + xenfb_info->resize_dpy = 1; + spin_unlock_irqrestore(&xenfb_info->resize_lock, flags); + return 0; +} + static struct fb_ops xenfb_fb_ops = { .owner = THIS_MODULE, .fb_read = fb_sys_read, @@ -233,6 +329,8 @@ static struct fb_ops xenfb_fb_ops = { .fb_fillrect = xenfb_fillrect, .fb_copyarea = xenfb_copyarea, .fb_imageblit = xenfb_imageblit, + .fb_check_var = xenfb_check_var, + .fb_set_par = xenfb_set_par, }; static irqreturn_t xenfb_event_handler(int rq, void *dev_id) @@ -261,6 +359,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, { struct xenfb_info *info; struct fb_info *fb_info; + int fb_size; + int val; int ret; info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -268,18 +368,35 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure"); return -ENOMEM; } + + /* Limit kernel param videoram amount to what is in xenstore */ + if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) { + if (val < video[KPARAM_MEM]) + video[KPARAM_MEM] = val; + } + + /* If requested res does not fit in available memory, use default */ + fb_size = video[KPARAM_MEM] * 1024 * 1024; + if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH / 8 + > fb_size) { + video[KPARAM_WIDTH] = XENFB_WIDTH; + video[KPARAM_HEIGHT] = XENFB_HEIGHT; + fb_size = XENFB_DEFAULT_FB_LEN; + } + dev->dev.driver_data = info; info->xbdev = dev; info->irq = -1; info->x1 = info->y1 = INT_MAX; spin_lock_init(&info->dirty_lock); + spin_lock_init(&info->resize_lock); - info->fb = vmalloc(xenfb_mem_len); + info->fb = vmalloc(fb_size); if (info->fb == NULL) goto error_nomem; - memset(info->fb, 0, xenfb_mem_len); + memset(info->fb, 0, fb_size); - info->nr_pages = (xenfb_mem_len + PAGE_SIZE - 1) >> PAGE_SHIFT; + info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT; info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages); if (!info->mfns) @@ -290,8 +407,6 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, if (!info->page) goto error_nomem; - xenfb_init_shared_page(info); - /* abusing framebuffer_alloc() to allocate pseudo_palette */ fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL); if (fb_info == NULL) @@ -304,9 +419,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->screen_base = info->fb; fb_info->fbops = &xenfb_fb_ops; - fb_info->var.xres_virtual = fb_info->var.xres = info->page->width; - fb_info->var.yres_virtual = fb_info->var.yres = info->page->height; - fb_info->var.bits_per_pixel = info->page->depth; + fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH]; + fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT]; + fb_info->var.bits_per_pixel = XENFB_DEPTH; fb_info->var.red = (struct fb_bitfield){16, 8, 0}; fb_info->var.green = (struct fb_bitfield){8, 8, 0}; @@ -318,9 +433,9 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->var.vmode = FB_VMODE_NONINTERLACED; fb_info->fix.visual = FB_VISUAL_TRUECOLOR; - fb_info->fix.line_length = info->page->line_length; + fb_info->fix.line_length = fb_info->var.xres * XENFB_DEPTH / 8; fb_info->fix.smem_start = 0; - fb_info->fix.smem_len = xenfb_mem_len; + fb_info->fix.smem_len = fb_size; strcpy(fb_info->fix.id, "xen"); fb_info->fix.type = FB_TYPE_PACKED_PIXELS; fb_info->fix.accel = FB_ACCEL_NONE; @@ -337,6 +452,8 @@ static int __devinit xenfb_probe(struct xenbus_device *dev, fb_info->fbdefio = &xenfb_defio; fb_deferred_io_init(fb_info); + xenfb_init_shared_page(info, fb_info); + ret = register_framebuffer(fb_info); if (ret) { fb_deferred_io_cleanup(fb_info); @@ -389,7 +506,7 @@ static int xenfb_resume(struct xenbus_device *dev) struct xenfb_info *info = dev->dev.driver_data; xenfb_disconnect_backend(info); - xenfb_init_shared_page(info); + xenfb_init_shared_page(info, info->fb_info); return xenfb_connect_backend(dev, info); } @@ -417,20 +534,23 @@ static unsigned long vmalloc_to_mfn(void *address) return pfn_to_mfn(vmalloc_to_pfn(address)); } -static void xenfb_init_shared_page(struct xenfb_info *info) +static void xenfb_init_shared_page(struct xenfb_info *info, + struct fb_info *fb_info) { int i; + int epd = PAGE_SIZE / sizeof(info->mfns[0]); for (i = 0; i < info->nr_pages; i++) info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE); - info->page->pd[0] = vmalloc_to_mfn(info->mfns); - info->page->pd[1] = 0; - info->page->width = XENFB_WIDTH; - info->page->height = XENFB_HEIGHT; - info->page->depth = XENFB_DEPTH; - info->page->line_length = (info->page->depth / 8) * info->page->width; - info->page->mem_length = xenfb_mem_len; + for (i = 0; i * epd < info->nr_pages; i++) + info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]); + + info->page->width = fb_info->var.xres; + info->page->height = fb_info->var.yres; + info->page->depth = fb_info->var.bits_per_pixel; + info->page->line_length = fb_info->fix.line_length; + info->page->mem_length = fb_info->fix.smem_len; info->page->in_cons = info->page->in_prod = 0; info->page->out_cons = info->page->out_prod = 0; } @@ -530,6 +650,11 @@ InitWait: val = 0; if (val) info->update_wanted = 1; + + if (xenbus_scanf(XBT_NIL, dev->otherend, + "feature-resize", "%d", &val) < 0) + val = 0; + info->feature_resize = val; break; case XenbusStateClosing: diff --git a/include/xen/interface/io/fbif.h b/include/xen/interface/io/fbif.h index 5a934dd7796..974a51ed916 100644 --- a/include/xen/interface/io/fbif.h +++ b/include/xen/interface/io/fbif.h @@ -49,11 +49,27 @@ struct xenfb_update { int32_t height; /* rect height */ }; +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize { + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* start offset within framebuffer */ +}; + #define XENFB_OUT_EVENT_SIZE 40 union xenfb_out_event { uint8_t type; struct xenfb_update update; + struct xenfb_resize resize; char pad[XENFB_OUT_EVENT_SIZE]; }; @@ -105,15 +121,18 @@ struct xenfb_page { * Each directory page holds PAGE_SIZE / sizeof(*pd) * framebuffer pages, and can thus map up to PAGE_SIZE * * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and - * sizeof(unsigned long) == 4, that's 4 Megs. Two directory - * pages should be enough for a while. + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 + * Megs 64 bit. 256 directories give enough room for a 512 + * Meg framebuffer with a max resolution of 12,800x10,240. + * Should be enough for a while with room leftover for + * expansion. */ - unsigned long pd[2]; + unsigned long pd[256]; }; /* - * Wart: xenkbd needs to know resolution. Put it here until a better - * solution is found, but don't leak it to the backend. + * Wart: xenkbd needs to know default resolution. Put it here until a + * better solution is found, but don't leak it to the backend. */ #ifdef __KERNEL__ #define XENFB_WIDTH 800 -- cgit v1.2.3-70-g09d2 From bfdab126cfa6fe3c2ddb8b6007a38202b510b6c1 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Mon, 26 May 2008 23:31:15 +0100 Subject: xen: add missing definitions in include/xen/interface/memory.h which ia64/xen needs Add xen handles realted definitions for xen memory which ia64/xen needs. Pointer argumsnts for ia64/xen hypercall are passed in pseudo physical address (guest physical address) so that it is required to convert guest kernel virtual address into pseudo physical address. The xen guest handle represents such arguments. Signed-off-by: Isaku Yamahata Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/xen/interface/memory.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/xen') diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index da768469aa9..af36ead1681 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -29,7 +29,7 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - ulong extent_start; + GUEST_HANDLE(ulong) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ unsigned long nr_extents; @@ -50,6 +50,7 @@ struct xen_memory_reservation { domid_t domid; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); /* * Returns the maximum machine frame number of mapped RAM in this system. @@ -85,7 +86,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - ulong extent_start; + GUEST_HANDLE(ulong) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -93,6 +94,7 @@ struct xen_machphys_mfn_list { */ unsigned int nr_extents; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); /* * Sets the GPFN at which a particular page appears in the specified guest's @@ -115,6 +117,7 @@ struct xen_add_to_physmap { /* GPFN where the source mapping page should appear. */ unsigned long gpfn; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); /* * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error @@ -129,13 +132,14 @@ struct xen_translate_gpfn_list { unsigned long nr_gpfns; /* List of GPFNs to translate. */ - ulong gpfn_list; + GUEST_HANDLE(ulong) gpfn_list; /* * Output list to contain MFN translations. May be the same as the input * list (in which case each input GPFN is overwritten with the output MFN). */ - ulong mfn_list; + GUEST_HANDLE(ulong) mfn_list; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); #endif /* __XEN_PUBLIC_MEMORY_H__ */ -- cgit v1.2.3-70-g09d2 From eb1e305f4ef201e549ffd475b7dcbcd4ec36d7dc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:23 +0100 Subject: xen: add rebind_evtchn_irq Add rebind_evtchn_irq(), which will rebind an device driver's existing irq to a new event channel on restore. Since the new event channel will be masked and bound to vcpu0, we update the state accordingly and unmask the irq once everything is set up. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/xen/events.c | 27 +++++++++++++++++++++++++++ include/xen/events.h | 1 + 2 files changed, 28 insertions(+) (limited to 'include/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 4f0f22b020e..f64e9798129 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -557,6 +557,33 @@ out: put_cpu(); } +/* Rebind a new event channel to an existing irq. */ +void rebind_evtchn_irq(int evtchn, int irq) +{ + /* Make sure the irq is masked, since the new event channel + will also be masked. */ + disable_irq(irq); + + spin_lock(&irq_mapping_update_lock); + + /* After resume the irq<->evtchn mappings are all cleared out */ + BUG_ON(evtchn_to_irq[evtchn] != -1); + /* Expect irq to have been bound before, + so the bindcount should be non-0 */ + BUG_ON(irq_bindcount[irq] == 0); + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn); + + spin_unlock(&irq_mapping_update_lock); + + /* new event channels are always bound to cpu 0 */ + irq_set_affinity(irq, cpumask_of_cpu(0)); + + /* Unmask the event channel. */ + enable_irq(irq); +} + /* Rebind an evtchn so that it gets delivered to a specific cpu */ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu) { diff --git a/include/xen/events.h b/include/xen/events.h index acd8e062c85..a82ec0c45c3 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -32,6 +32,7 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id); void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); int resend_irq_on_evtchn(unsigned int irq); +void rebind_evtchn_irq(int evtchn, int irq); static inline void notify_remote_via_evtchn(int port) { -- cgit v1.2.3-70-g09d2 From 6b9b732d0e396a3f1a95977162a8624aafce38a1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:25 +0100 Subject: xen-console: add save/restore Add code to: 1. Deal with the console page being canonicalized. During save, the console's mfn in the start_info structure is canonicalized to a pfn. In order to deal with that, we always use a copy of the pfn and indirect off that all the time. However, we fall back to using the mfn if the pfn hasn't been initialized yet. 2. Restore the console event channel, and rebind it to the existing irq. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- drivers/char/hvc_xen.c | 25 +++++++++++++++++++++---- include/xen/hvc-console.h | 2 ++ 2 files changed, 23 insertions(+), 4 deletions(-) (limited to 'include/xen') diff --git a/drivers/char/hvc_xen.c b/drivers/char/hvc_xen.c index d5fe0a8952f..db2ae421627 100644 --- a/drivers/char/hvc_xen.c +++ b/drivers/char/hvc_xen.c @@ -39,9 +39,14 @@ static int xencons_irq; /* ------------------------------------------------------------------ */ +static unsigned long console_pfn = ~0ul; + static inline struct xencons_interface *xencons_interface(void) { - return mfn_to_virt(xen_start_info->console.domU.mfn); + if (console_pfn == ~0ul) + return mfn_to_virt(xen_start_info->console.domU.mfn); + else + return __va(console_pfn << PAGE_SHIFT); } static inline void notify_daemon(void) @@ -101,20 +106,32 @@ static int __init xen_init(void) { struct hvc_struct *hp; - if (!is_running_on_xen()) - return 0; + if (!is_running_on_xen() || + is_initial_xendomain() || + !xen_start_info->console.domU.evtchn) + return -ENODEV; xencons_irq = bind_evtchn_to_irq(xen_start_info->console.domU.evtchn); if (xencons_irq < 0) - xencons_irq = 0 /* NO_IRQ */; + xencons_irq = 0; /* NO_IRQ */ + hp = hvc_alloc(HVC_COOKIE, xencons_irq, &hvc_ops, 256); if (IS_ERR(hp)) return PTR_ERR(hp); hvc = hp; + + console_pfn = mfn_to_pfn(xen_start_info->console.domU.mfn); + return 0; } +void xen_console_resume(void) +{ + if (xencons_irq) + rebind_evtchn_irq(xen_start_info->console.domU.evtchn, xencons_irq); +} + static void __exit xen_fini(void) { if (hvc) diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index efc3237ab99..fd5483a059b 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,6 +3,8 @@ extern struct console xenboot_console; +void xen_console_resume(void); + void xen_raw_console_write(const char *str); void xen_raw_printk(const char *fmt, ...); -- cgit v1.2.3-70-g09d2 From 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:27 +0100 Subject: xen: implement save/restore This patch implements Xen save/restore and migration. Saving is triggered via xenbus, which is polled in drivers/xen/manage.c. When a suspend request comes in, the kernel prepares itself for saving by: 1 - Freeze all processes. This is primarily to prevent any partially-completed pagetable updates from confusing the suspend process. If CONFIG_PREEMPT isn't defined, then this isn't necessary. 2 - Suspend xenbus and other devices 3 - Stop_machine, to make sure all the other vcpus are quiescent. The Xen tools require the domain to run its save off vcpu0. 4 - Within the stop_machine state, it pins any unpinned pgds (under construction or destruction), performs canonicalizes various other pieces of state (mostly converting mfns to pfns), and finally 5 - Suspend the domain Restore reverses the steps used to save the domain, ending when all the frozen processes are thawed. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/Makefile | 2 +- arch/x86/xen/enlighten.c | 6 +-- arch/x86/xen/mmu.c | 46 +++++++++++++++++ arch/x86/xen/smp.c | 2 +- arch/x86/xen/suspend.c | 42 +++++++++++++++ arch/x86/xen/time.c | 8 +++ arch/x86/xen/xen-ops.h | 4 ++ drivers/xen/events.c | 83 +++++++++++++++++++++++++++++ drivers/xen/grant-table.c | 4 +- drivers/xen/manage.c | 126 ++++++++++++++++++++++++++++++++++++++++----- include/linux/page-flags.h | 1 + include/xen/events.h | 3 ++ include/xen/grant_table.h | 3 ++ include/xen/xen-ops.h | 9 ++++ 14 files changed, 318 insertions(+), 21 deletions(-) create mode 100644 arch/x86/xen/suspend.c (limited to 'include/xen') diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 40b119b6b10..2ba2d164913 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o \ - time.o xen-asm.o grant-table.o + time.o xen-asm.o grant-table.o suspend.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index ce67dc8f36a..b94f63ac228 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -857,7 +857,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base) PFN_DOWN(__pa(xen_start_info->pt_base))); } -static __init void setup_shared_info(void) +void xen_setup_shared_info(void) { if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); @@ -894,7 +894,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.set_pte = xen_set_pte; - setup_shared_info(); + xen_setup_shared_info(); /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ @@ -902,7 +902,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) } /* This is called once we have the cpu_possible_map */ -void __init xen_setup_vcpu_info_placement(void) +void xen_setup_vcpu_info_placement(void) { int cpu; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 4740cda3656..e95955968ba 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -560,6 +560,29 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On save, we need to pin all pagetables to make sure they get their + * mfns turned into pfns. Search the list for any unpinned pgds and pin + * them (unpinned pgds are not currently in use, probably because the + * process is under construction or destruction). + */ +void xen_mm_pin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) { + xen_pgd_pin((pgd_t *)page_address(page)); + SetPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + /* The init_mm pagetable is really pinned as soon as its created, but that's before we have page structures to store the bits. So do all the book-keeping now. */ @@ -617,6 +640,29 @@ static void xen_pgd_unpin(pgd_t *pgd) xen_mc_issue(0); } +/* + * On resume, undo any pinning done at save, so that the rest of the + * kernel doesn't see any unexpected pinned pagetables. + */ +void xen_mm_unpin_all(void) +{ + unsigned long flags; + struct page *page; + + spin_lock_irqsave(&pgd_lock, flags); + + list_for_each_entry(page, &pgd_list, lru) { + if (PageSavePinned(page)) { + BUG_ON(!PagePinned(page)); + printk("unpinning pinned %p\n", page_address(page)); + xen_pgd_unpin((pgd_t *)page_address(page)); + ClearPageSavePinned(page); + } + } + + spin_unlock_irqrestore(&pgd_lock, flags); +} + void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) { spin_lock(&next->page_table_lock); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 74ab8968c52..d2e3c20127d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -35,7 +35,7 @@ #include "xen-ops.h" #include "mmu.h" -static cpumask_t xen_cpu_initialized_map; +cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq) = -1; static DEFINE_PER_CPU(int, callfunc_irq) = -1; static DEFINE_PER_CPU(int, debug_irq) = -1; diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c new file mode 100644 index 00000000000..7620a16fe53 --- /dev/null +++ b/arch/x86/xen/suspend.c @@ -0,0 +1,42 @@ +#include + +#include +#include +#include + +#include +#include + +#include "xen-ops.h" +#include "mmu.h" + +void xen_pre_suspend(void) +{ + xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + mfn_to_pfn(xen_start_info->console.domU.mfn); + + BUG_ON(!irqs_disabled()); + + HYPERVISOR_shared_info = &xen_dummy_shared_info; + if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP), + __pte_ma(0), 0)) + BUG(); +} + +void xen_post_suspend(int suspend_cancelled) +{ + if (suspend_cancelled) { + xen_start_info->store_mfn = + pfn_to_mfn(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = + pfn_to_mfn(xen_start_info->console.domU.mfn); + } else { +#ifdef CONFIG_SMP + xen_cpu_initialized_map = cpu_online_map; +#endif + } + + xen_setup_shared_info(); +} + diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa24..0bef256e5f2 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,6 +572,14 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } +void xen_time_suspend(void) +{ +} + +void xen_time_resume(void) +{ +} + __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index a1bc89a8f16..a0503acad66 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -9,6 +9,7 @@ extern const char xen_hypervisor_callback[]; extern const char xen_failsafe_callback[]; +struct trap_info; void xen_copy_trap_info(struct trap_info *traps); DECLARE_PER_CPU(unsigned long, xen_cr3); @@ -19,6 +20,7 @@ extern struct shared_info xen_dummy_shared_info; extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); +void xen_setup_shared_info(void); char * __init xen_memory_setup(void); void __init xen_arch_setup(void); @@ -59,6 +61,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info, int wait); +extern cpumask_t xen_cpu_initialized_map; + /* Declare an asm function, along with symbols needed to make it inlineable */ diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 70375a69076..73d78dc9b87 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -674,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq) return ret; } +static void restore_cpu_virqs(unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int virq, irq, evtchn; + + for (virq = 0; virq < NR_VIRQS; virq++) { + if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_VIRQ); + BUG_ON(irq_info[irq].index != virq); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq) != 0) + BUG(); + evtchn = bind_virq.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } +} + +static void restore_cpu_ipis(unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int ipi, irq, evtchn; + + for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { + if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) + continue; + + BUG_ON(irq_info[irq].type != IRQT_IPI); + BUG_ON(irq_info[irq].index != ipi); + + /* Get a new binding from Xen. */ + bind_ipi.vcpu = cpu; + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi) != 0) + BUG(); + evtchn = bind_ipi.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + bind_evtchn_to_cpu(evtchn, cpu); + + /* Ready for use. */ + unmask_evtchn(evtchn); + + } +} + +void xen_irq_resume(void) +{ + unsigned int cpu, irq, evtchn; + + init_evtchn_cpu_bindings(); + + /* New event-channel space is not 'live' yet. */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + mask_evtchn(evtchn); + + /* No IRQ <-> event-channel mappings. */ + for (irq = 0; irq < NR_IRQS; irq++) + irq_info[irq].evtchn = 0; /* zap event-channel binding */ + + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + + for_each_possible_cpu(cpu) { + restore_cpu_virqs(cpu); + restore_cpu_ipis(cpu); + } +} + static struct irq_chip xen_dynamic_chip __read_mostly = { .name = "xen-dyn", .mask = disable_dynirq, diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 52b6b41b909..e9e11168616 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return 0; } -static int gnttab_resume(void) +int gnttab_resume(void) { if (max_nr_grant_frames() < nr_grant_frames) return -ENOSYS; return gnttab_map(0, nr_grant_frames - 1); } -static int gnttab_suspend(void) +int gnttab_suspend(void) { arch_gnttab_unmap_shared(shared, nr_grant_frames); return 0; diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index aa7af9e6abc..ba85fa2cff3 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -5,21 +5,113 @@ #include #include #include +#include +#include #include - -#define SHUTDOWN_INVALID -1 -#define SHUTDOWN_POWEROFF 0 -#define SHUTDOWN_SUSPEND 2 -/* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only - * report a crash, not be instructed to crash! - * HALT is the same as POWEROFF, as far as we're concerned. The tools use - * the distinction when we return the reason code to them. - */ -#define SHUTDOWN_HALT 4 +#include +#include +#include +#include + +#include +#include + +enum shutdown_state { + SHUTDOWN_INVALID = -1, + SHUTDOWN_POWEROFF = 0, + SHUTDOWN_SUSPEND = 2, + /* Code 3 is SHUTDOWN_CRASH, which we don't use because the domain can only + report a crash, not be instructed to crash! + HALT is the same as POWEROFF, as far as we're concerned. The tools use + the distinction when we return the reason code to them. */ + SHUTDOWN_HALT = 4, +}; /* Ignore multiple shutdown requests. */ -static int shutting_down = SHUTDOWN_INVALID; +static enum shutdown_state shutting_down = SHUTDOWN_INVALID; + +static int xen_suspend(void *data) +{ + int *cancelled = data; + + BUG_ON(!irqs_disabled()); + + load_cr3(swapper_pg_dir); + + xen_mm_pin_all(); + gnttab_suspend(); + xen_time_suspend(); + xen_pre_suspend(); + + /* + * This hypercall returns 1 if suspend was cancelled + * or the domain was merely checkpointed, and 0 if it + * is resuming in a new domain. + */ + *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); + + xen_post_suspend(*cancelled); + xen_time_resume(); + gnttab_resume(); + xen_mm_unpin_all(); + + if (!*cancelled) { + xen_irq_resume(); + xen_console_resume(); + } + + return 0; +} + +static void do_suspend(void) +{ + int err; + int cancelled = 1; + + shutting_down = SHUTDOWN_SUSPEND; + +#ifdef CONFIG_PREEMPT + /* If the kernel is preemptible, we need to freeze all the processes + to prevent them from being in the middle of a pagetable update + during suspend. */ + err = freeze_processes(); + if (err) { + printk(KERN_ERR "xen suspend: freeze failed %d\n", err); + return; + } +#endif + + err = device_suspend(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen suspend: device_suspend %d\n", err); + goto out; + } + + printk("suspending xenbus...\n"); + /* XXX use normal device tree? */ + xenbus_suspend(); + + err = stop_machine_run(xen_suspend, &cancelled, 0); + if (err) { + printk(KERN_ERR "failed to start xen_suspend: %d\n", err); + goto out; + } + + if (!cancelled) + xenbus_resume(); + else + xenbus_suspend_cancel(); + + device_resume(); + + +out: +#ifdef CONFIG_PREEMPT + thaw_processes(); +#endif + shutting_down = SHUTDOWN_INVALID; +} static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) @@ -52,11 +144,17 @@ static void shutdown_handler(struct xenbus_watch *watch, } if (strcmp(str, "poweroff") == 0 || - strcmp(str, "halt") == 0) + strcmp(str, "halt") == 0) { + shutting_down = SHUTDOWN_POWEROFF; orderly_poweroff(false); - else if (strcmp(str, "reboot") == 0) + } else if (strcmp(str, "reboot") == 0) { + shutting_down = SHUTDOWN_POWEROFF; /* ? */ ctrl_alt_del(); - else { +#ifdef CONFIG_PM_SLEEP + } else if (strcmp(str, "suspend") == 0) { + do_suspend(); +#endif + } else { printk(KERN_INFO "Ignoring shutdown request: %s\n", str); shutting_down = SHUTDOWN_INVALID; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 590cff32415..02955a1c3d7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(SavePinned, dirty); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) diff --git a/include/xen/events.h b/include/xen/events.h index a82ec0c45c3..67c4436554a 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -41,4 +41,7 @@ static inline void notify_remote_via_evtchn(int port) } extern void notify_remote_via_irq(int irq); + +extern void xen_irq_resume(void); + #endif /* _XEN_EVENTS_H */ diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 46620484612..a40f1cd91be 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -51,6 +51,9 @@ struct gnttab_free_callback { u16 count; }; +int gnttab_suspend(void); +int gnttab_resume(void); + int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 10ddfe0142d..5d7a6db54a8 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -5,4 +5,13 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); +void xen_pre_suspend(void); +void xen_post_suspend(int suspend_cancelled); + +void xen_mm_pin_all(void); +void xen_mm_unpin_all(void); + +void xen_time_suspend(void); +void xen_time_resume(void); + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3-70-g09d2 From 359cdd3f866b6219a6729e313faf2221397f3278 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:28 +0100 Subject: xen: maintain clock offset over save/restore Hook into the device model to make sure that timekeeping's resume handler is called. This deals with our clocksource's non-monotonicity over the save/restore. Explicitly call clock_has_changed() to make sure that all the timers get retriggered properly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- arch/x86/xen/time.c | 8 -------- drivers/xen/manage.c | 15 ++++++++++++--- include/xen/xen-ops.h | 3 --- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0bef256e5f2..c39e1a5aa24 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -572,14 +572,6 @@ void xen_setup_cpu_clockevents(void) clockevents_register_device(&__get_cpu_var(xen_clock_events)); } -void xen_time_suspend(void) -{ -} - -void xen_time_resume(void) -{ -} - __init void xen_time_init(void) { int cpu = smp_processor_id(); diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index ba85fa2cff3..675c3dd2396 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -34,14 +34,21 @@ static enum shutdown_state shutting_down = SHUTDOWN_INVALID; static int xen_suspend(void *data) { int *cancelled = data; + int err; BUG_ON(!irqs_disabled()); load_cr3(swapper_pg_dir); + err = device_power_down(PMSG_SUSPEND); + if (err) { + printk(KERN_ERR "xen_suspend: device_power_down failed: %d\n", + err); + return err; + } + xen_mm_pin_all(); gnttab_suspend(); - xen_time_suspend(); xen_pre_suspend(); /* @@ -52,10 +59,11 @@ static int xen_suspend(void *data) *cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info)); xen_post_suspend(*cancelled); - xen_time_resume(); gnttab_resume(); xen_mm_unpin_all(); + device_power_up(); + if (!*cancelled) { xen_irq_resume(); xen_console_resume(); @@ -105,7 +113,8 @@ static void do_suspend(void) device_resume(); - + /* Make sure timer events get retriggered on all CPUs */ + clock_was_set(); out: #ifdef CONFIG_PREEMPT thaw_processes(); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 5d7a6db54a8..a706d6a7896 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -11,7 +11,4 @@ void xen_post_suspend(int suspend_cancelled); void xen_mm_pin_all(void); void xen_mm_unpin_all(void); -void xen_time_suspend(void); -void xen_time_resume(void); - #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3-70-g09d2 From 0261ac5f2f43a1906cfacfb19d62ed643d162cbe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 29 May 2008 09:31:50 +0200 Subject: xen: fix "xen: implement save/restore" -tip testing found the following build breakage: drivers/built-in.o: In function `xen_suspend': manage.c:(.text+0x4390f): undefined reference to `xen_console_resume' with this config: http://redhat.com/~mingo/misc/config-Thu_May_29_09_23_16_CEST_2008.bad i have bisected it down to: | commit 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 | Author: Jeremy Fitzhardinge | Date: Mon May 26 23:31:27 2008 +0100 | | xen: implement save/restore the problem is that drivers/xen/manage.c is built unconditionally if CONFIG_XEN is enabled and makes use of xen_suspend(), but drivers/char/hvc_xen.c, where the xen_suspend() method is implemented, is only build if CONFIG_HVC_XEN=y as well. i have solved this by providing a NOP implementation for xen_suspend() in the !CONFIG_HVC_XEN case. Signed-off-by: Ingo Molnar --- include/xen/hvc-console.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/xen') diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index fd5483a059b..98b79bc404d 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -3,7 +3,11 @@ extern struct console xenboot_console; +#ifdef CONFIG_HVC_XEN void xen_console_resume(void); +#else +static inline void xen_console_resume(void) { } +#endif void xen_raw_console_write(const char *str); void xen_raw_printk(const char *fmt, ...); -- cgit v1.2.3-70-g09d2 From 7e0edc1bc343231029084761ebf59e522902eb49 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 31 May 2008 01:33:04 +0100 Subject: xen: add new Xen elfnote types and use them appropriately Define recently added XEN_ELFNOTEs, and use them appropriately. Most significantly, this enables domain checkpointing (xm save -c). Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/xen-head.S | 5 +++++ include/xen/interface/elfnote.h | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include/xen') diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 2ab5f42f34d..ef6c9e005f9 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -7,6 +7,7 @@ #include #include #include +#include __INIT ENTRY(startup_xen) @@ -32,5 +33,9 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, + .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) #endif /*CONFIG_XEN */ diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index a64d3df5bd9..7a8262c375c 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -120,6 +120,26 @@ */ #define XEN_ELFNOTE_BSD_SYMTAB 11 +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ /* -- cgit v1.2.3-70-g09d2 From e57778a1e30470c9f5b79e370511b9af29b59c48 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 16 Jun 2008 04:30:02 -0700 Subject: xen: implement ptep_modify_prot_start/commit Xen has a pte update function which will update a pte while preserving its accessed and dirty bits. This means that ptep_modify_prot_start() can be implemented as a simple read of the pte value. The hardware may update the pte in the meantime, but ptep_modify_prot_commit() updates it while preserving any changes that may have happened in the meantime. The updates in ptep_modify_prot_commit() are batched if we're currently in lazy mmu mode. The mmu_update hypercall can take a batch of updates to perform, but this code doesn't make particular use of that feature, in favour of using generic multicall batching to get them all into the hypervisor. The net effect of this is that each mprotect pte update turns from two expensive trap-and-emulate faults into they hypervisor into a single hypercall whose cost is amortized in a batched multicall. Signed-off-by: Jeremy Fitzhardinge Acked-by: Linus Torvalds Acked-by: Hugh Dickins Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 13 ++++++++++--- arch/x86/xen/mmu.c | 21 +++++++++++++++++++++ arch/x86/xen/mmu.h | 4 ++++ include/xen/interface/features.h | 3 +++ include/xen/interface/xen.h | 9 +++++++-- 5 files changed, 45 insertions(+), 5 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0b7553cbc52..bd74229081c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -168,7 +168,9 @@ static void __init xen_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", pv_info.name); - printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic); + printk(KERN_INFO "Hypervisor signature: %s%s\n", + xen_start_info->magic, + xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); } static void xen_cpuid(unsigned int *ax, unsigned int *bx, @@ -1243,6 +1245,8 @@ asmlinkage void __init xen_start_kernel(void) BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); + xen_setup_features(); + /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; @@ -1252,14 +1256,17 @@ asmlinkage void __init xen_start_kernel(void) pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; + if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { + pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; + pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; + } + machine_ops = xen_machine_ops; #ifdef CONFIG_SMP smp_ops = xen_smp_ops; #endif - xen_setup_features(); - /* Get mfn list */ if (!xen_feature(XENFEAT_auto_translated_physmap)) xen_build_dynamic_phys_to_machine(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 8132aa8c5d4..846dad7d54a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -323,6 +323,27 @@ out: preempt_enable(); } +pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + /* Just return the pte as-is. We preserve the bits on commit */ + return *ptep; +} + +void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + struct multicall_space mcs; + struct mmu_update *u; + + mcs = xen_mc_entry(sizeof(*u)); + u = mcs.args; + u->ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; + u->val = pte_val_ma(pte); + MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); +} + /* Assume pteval_t is equivalent to all the other *val_t types. */ static pteval_t pte_mfn_to_pfn(pteval_t val) { diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index e3dd09e25c6..297bf9f5b8b 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -52,4 +52,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val); void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_pmd_clear(pmd_t *pmdp); +pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + #endif /* _XEN_MMU_H */ diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index d73228d1648..f51b6413b05 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -38,6 +38,9 @@ */ #define XENFEAT_pae_pgdir_above_4gb 4 +/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ +#define XENFEAT_mmu_pt_update_preserve_ad 5 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 819a0331cda..2befa3e2f1b 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -114,9 +114,14 @@ * ptr[:2] -- Machine address within the frame whose mapping to modify. * The frame must belong to the FD, if one is specified. * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: + * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed + * with those in @val. */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ /* * MMU EXTENDED OPERATIONS -- cgit v1.2.3-70-g09d2 From ad55db9fed6d6cd09333045945cb03ba2c070085 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Tue, 8 Jul 2008 15:06:32 -0700 Subject: xen: add xen_arch_resume()/xen_timer_resume hook for ia64 support add xen_timer_resume() hook. Timer resume should be done after event channel is resumed. add xen_arch_resume() hook when ipi becomes usable after resume. After resume, some cpu specific resource must be reinitialized on ia64 that can't be set by another cpu. However available hooks is run once on only one cpu so that ipi has to be used. During stop_machine_run() ipi can't be used because interrupt is masked. So add another hook after stop_machine_run(). Another approach might be use resume hook which is run by device_resume(). However device_resume() may be executed on suspend error recovery path. So it is necessary to determine whether it is executed on real resume path or error recovery path. Signed-off-by: Isaku Yamahata Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/suspend.c | 5 ++++- arch/x86/xen/xen-ops.h | 1 - drivers/xen/manage.c | 6 ++++-- include/xen/xen-ops.h | 3 +++ 4 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include/xen') diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 251669a932d..2a234db5949 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -38,8 +38,11 @@ void xen_post_suspend(int suspend_cancelled) xen_cpu_initialized_map = cpu_online_map; #endif xen_vcpu_restore(); - xen_timer_resume(); } } +void xen_arch_resume(void) +{ + /* nothing */ +} diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 6f4b1045c1c..77354d20425 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -37,7 +37,6 @@ void __init xen_time_init(void); unsigned long xen_get_wallclock(void); int xen_set_wallclock(unsigned long time); unsigned long long xen_sched_clock(void); -void xen_timer_resume(void); irqreturn_t xen_debug_interrupt(int irq, void *dev_id); diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 5b546e365f0..2bb268e4ac5 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -68,6 +68,7 @@ static int xen_suspend(void *data) if (!*cancelled) { xen_irq_resume(); xen_console_resume(); + xen_timer_resume(); } return 0; @@ -107,9 +108,10 @@ static void do_suspend(void) goto out; } - if (!cancelled) + if (!cancelled) { + xen_arch_resume(); xenbus_resume(); - else + } else xenbus_suspend_cancel(); device_resume(); diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index a706d6a7896..883a21bba24 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -11,4 +11,7 @@ void xen_post_suspend(int suspend_cancelled); void xen_mm_pin_all(void); void xen_mm_unpin_all(void); +void xen_timer_resume(void); +void xen_arch_resume(void); + #endif /* INCLUDE_XEN_OPS_H */ -- cgit v1.2.3-70-g09d2 From 48b5db20621388582ca11ac3c61d3403966dbe51 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 8 Jul 2008 15:06:34 -0700 Subject: xen64: define asm/xen/interface for 64-bit Copy 64-bit definitions of various interface structures into place. Signed-off-by: Jeremy Fitzhardinge Cc: Stephen Tweedie Cc: Eduardo Habkost Cc: Mark McLoughlin Signed-off-by: Ingo Molnar --- arch/x86/xen/mmu.h | 12 --- include/asm-x86/xen/interface.h | 139 ++++++++++++-------------------- include/asm-x86/xen/interface_32.h | 97 ++++++++++++++++++++++ include/asm-x86/xen/interface_64.h | 159 +++++++++++++++++++++++++++++++++++++ include/xen/interface/callback.h | 6 +- 5 files changed, 308 insertions(+), 105 deletions(-) create mode 100644 include/asm-x86/xen/interface_32.h create mode 100644 include/asm-x86/xen/interface_64.h (limited to 'include/xen') diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 297bf9f5b8b..7856e37f604 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -10,18 +10,6 @@ enum pt_level { PT_PTE }; -/* - * Page-directory addresses above 4GB do not fit into architectural %cr3. - * When accessing %cr3, or equivalent field in vcpu_guest_context, guests - * must use the following accessor macros to pack/unpack valid MFNs. - * - * Note that Xen is using the fact that the pagetable base is always - * page-aligned, and putting the 12 MSB of the address into the 12 LSB - * of cr3. - */ -#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) -#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) - void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h index 6227000a1e8..9d810f2538a 100644 --- a/include/asm-x86/xen/interface.h +++ b/include/asm-x86/xen/interface.h @@ -1,13 +1,13 @@ /****************************************************************************** * arch-x86_32.h * - * Guest OS interface to x86 32-bit Xen. + * Guest OS interface to x86 Xen. * * Copyright (c) 2004, K A Fraser */ -#ifndef __XEN_PUBLIC_ARCH_X86_32_H__ -#define __XEN_PUBLIC_ARCH_X86_32_H__ +#ifndef __ASM_X86_XEN_INTERFACE_H +#define __ASM_X86_XEN_INTERFACE_H #ifdef __XEN__ #define __DEFINE_GUEST_HANDLE(name, type) \ @@ -57,6 +57,17 @@ DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); #endif +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 32 + /* * SEGMENT DESCRIPTOR TABLES */ @@ -70,59 +81,22 @@ DEFINE_GUEST_HANDLE(void); #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) -/* - * These flat segments are in the Xen-private section of every GDT. Since these - * are also present in the initial GDT, many OSes will be able to avoid - * installing their own GDT. - */ -#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ -#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ -#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ -#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ -#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ -#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ - -#define FLAT_KERNEL_CS FLAT_RING1_CS -#define FLAT_KERNEL_DS FLAT_RING1_DS -#define FLAT_KERNEL_SS FLAT_RING1_SS -#define FLAT_USER_CS FLAT_RING3_CS -#define FLAT_USER_DS FLAT_RING3_DS -#define FLAT_USER_SS FLAT_RING3_SS - -/* And the trap vector is... */ -#define TRAP_INSTR "int $0x82" - -/* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. - */ -#ifdef CONFIG_X86_PAE -#define __HYPERVISOR_VIRT_START 0xF5800000 -#else -#define __HYPERVISOR_VIRT_START 0xFC000000 -#endif - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#endif - -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif - -/* Maximum number of virtual CPUs in multi-processor guests. */ -#define MAX_VIRT_CPUS 32 - -#ifndef __ASSEMBLY__ - /* * Send an array of these to HYPERVISOR_set_trap_table() + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter */ #define TI_GET_DPL(_ti) ((_ti)->flags & 3) #define TI_GET_IF(_ti) ((_ti)->flags & 4) #define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) +#ifndef __ASSEMBLY__ struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ @@ -131,32 +105,21 @@ struct trap_info { }; DEFINE_GUEST_HANDLE_STRUCT(trap_info); -struct cpu_user_regs { - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - uint32_t esi; - uint32_t edi; - uint32_t ebp; - uint32_t eax; - uint16_t error_code; /* private */ - uint16_t entry_vector; /* private */ - uint32_t eip; - uint16_t cs; - uint8_t saved_upcall_mask; - uint8_t _pad0; - uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ - uint32_t esp; - uint16_t ss, _pad1; - uint16_t es, _pad2; - uint16_t ds, _pad3; - uint16_t fs, _pad4; - uint16_t gs, _pad5; +struct arch_shared_info { + unsigned long max_pfn; /* max pfn that appears in table */ + /* Frame containing list of mfns containing list of mfns containing p2m. */ + unsigned long pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; }; -DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); +#endif /* !__ASSEMBLY__ */ -typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ +#ifdef CONFIG_X86_32 +#include "interface_32.h" +#else +#include "interface_64.h" +#endif +#ifndef __ASSEMBLY__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. @@ -173,33 +136,29 @@ struct vcpu_guest_context { unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ unsigned long event_callback_cs; /* CS:EIP of event callback */ unsigned long event_callback_eip; unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; + unsigned long syscall_callback_eip; +#endif unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); - -struct arch_shared_info { - unsigned long max_pfn; /* max pfn that appears in table */ - /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; - unsigned long nmi_reason; -}; - -struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ -}; - -struct xen_callback { - unsigned long cs; - unsigned long eip; -}; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLY__ */ /* * Prefix forces emulation of some non-trapping instructions. @@ -213,4 +172,4 @@ struct xen_callback { #define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" #endif -#endif +#endif /* __ASM_X86_XEN_INTERFACE_H */ diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h new file mode 100644 index 00000000000..d8ac41d5db8 --- /dev/null +++ b/include/asm-x86/xen/interface_32.h @@ -0,0 +1,97 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __ASM_X86_XEN_INTERFACE_32_H +#define __ASM_X86_XEN_INTERFACE_32_H + + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +/* And the trap vector is... */ +#define TRAP_INSTR "int $0x82" + +/* + * Virtual addresses beyond this are not modifiable by guest OSes. The + * machine->physical mapping table starts at this address, read-only. + */ +#define __HYPERVISOR_VIRT_START 0xF5800000 + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ +}; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#define XEN_CALLBACK(__cs, __eip) \ + ((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) }) +#endif /* !__ASSEMBLY__ */ + + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + * + * Note that Xen is using the fact that the pagetable base is always + * page-aligned, and putting the 12 MSB of the address into the 12 LSB + * of cr3. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +#endif /* __ASM_X86_XEN_INTERFACE_32_H */ diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h new file mode 100644 index 00000000000..842266ce96e --- /dev/null +++ b/include/asm-x86/xen/interface_64.h @@ -0,0 +1,159 @@ +#ifndef __ASM_X86_XEN_INTERFACE_64_H +#define __ASM_X86_XEN_INTERFACE_64_H + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; + +typedef unsigned long xen_callback_t; + +#define XEN_CALLBACK(__cs, __rip) \ + ((unsigned long)(__rip)) + +#endif /* !__ASSEMBLY__ */ + + +#endif /* __ASM_X86_XEN_INTERFACE_64_H */ diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h index 4aadcba31af..2ae3cd24326 100644 --- a/include/xen/interface/callback.h +++ b/include/xen/interface/callback.h @@ -82,9 +82,9 @@ */ #define CALLBACKOP_register 0 struct callback_register { - uint16_t type; - uint16_t flags; - struct xen_callback address; + uint16_t type; + uint16_t flags; + xen_callback_t address; }; /* -- cgit v1.2.3-70-g09d2 From 9c8a4420444801bd9d818f542eb4a5be8d5687f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 9 Jul 2008 13:15:03 +0200 Subject: xen64: fix !HVC_XEN build dependency fix: arch/x86/xen/built-in.o: In function `set_page_prot': enlighten.c:(.text+0x111d): undefined reference to `xen_raw_printk' arch/x86/xen/built-in.o: In function `xen_start_kernel': : undefined reference to `xen_raw_console_write' arch/x86/xen/built-in.o: In function `xen_start_kernel': : undefined reference to `xen_raw_console_write' Signed-off-by: Ingo Molnar --- include/xen/hvc-console.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/xen') diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index 98b79bc404d..c3adde32669 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -5,11 +5,12 @@ extern struct console xenboot_console; #ifdef CONFIG_HVC_XEN void xen_console_resume(void); +void xen_raw_console_write(const char *str); +void xen_raw_printk(const char *fmt, ...); #else static inline void xen_console_resume(void) { } +static inline void xen_raw_console_write(const char *str) { } +static inline void xen_raw_printk(const char *fmt, ...) { } #endif -void xen_raw_console_write(const char *str); -void xen_raw_printk(const char *fmt, ...); - #endif /* XEN_HVC_CONSOLE_H */ -- cgit v1.2.3-70-g09d2 From 2d9e1e2f58b5612aa4eab0ab54c84308a29dbd79 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 7 Jul 2008 12:07:53 -0700 Subject: xen: implement Xen-specific spinlocks The standard ticket spinlocks are very expensive in a virtual environment, because their performance depends on Xen's scheduler giving vcpus time in the order that they're supposed to take the spinlock. This implements a Xen-specific spinlock, which should be much more efficient. The fast-path is essentially the old Linux-x86 locks, using a single lock byte. The locker decrements the byte; if the result is 0, then they have the lock. If the lock is negative, then locker must spin until the lock is positive again. When there's contention, the locker spin for 2^16[*] iterations waiting to get the lock. If it fails to get the lock in that time, it adds itself to the contention count in the lock and blocks on a per-cpu event channel. When unlocking the spinlock, the locker looks to see if there's anyone blocked waiting for the lock by checking for a non-zero waiter count. If there's a waiter, it traverses the per-cpu "lock_spinners" variable, which contains which lock each CPU is waiting on. It picks one CPU waiting on the lock and sends it an event to wake it up. This allows efficient fast-path spinlock operation, while allowing spinning vcpus to give up their processor time while waiting for a contended lock. [*] 2^16 iterations is threshold at which 98% locks have been taken according to Thomas Friebel's Xen Summit talk "Preventing Guests from Spinning Around". Therefore, we'd expect the lock and unlock slow paths will only be entered 2% of the time. Signed-off-by: Jeremy Fitzhardinge Cc: Jens Axboe Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Petr Tesarik Cc: Virtualization Cc: Xen devel Cc: Thomas Friebel Cc: Nick Piggin Signed-off-by: Ingo Molnar --- arch/x86/xen/smp.c | 172 ++++++++++++++++++++++++++++++++++++++++++- drivers/xen/events.c | 27 +++++++ include/asm-x86/xen/events.h | 1 + include/xen/events.h | 7 ++ 4 files changed, 206 insertions(+), 1 deletion(-) (limited to 'include/xen') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index a8ebafc09d4..e693812ac59 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -15,6 +15,7 @@ * This does not handle HOTPLUG_CPU yet. */ #include +#include #include #include @@ -35,6 +36,8 @@ #include "xen-ops.h" #include "mmu.h" +static void __cpuinit xen_init_lock_cpu(int cpu); + cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); @@ -179,6 +182,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; + xen_init_lock_cpu(0); + smp_store_cpu_info(0); cpu_data(0).x86_max_cores = 1; set_cpu_sibling_map(0); @@ -301,6 +306,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); + xen_init_lock_cpu(cpu); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; @@ -413,6 +419,170 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +struct xen_spinlock { + unsigned char lock; /* 0 -> free; 1 -> locked */ + unsigned short spinners; /* count of waiting cpus */ +}; + +static int xen_spin_is_locked(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + return xl->lock != 0; +} + +static int xen_spin_is_contended(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + /* Not strictly true; this is only the count of contended + lock-takers entering the slow path. */ + return xl->spinners != 0; +} + +static int xen_spin_trylock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + u8 old = 1; + + asm("xchgb %b0,%1" + : "+q" (old), "+m" (xl->lock) : : "memory"); + + return old == 0; +} + +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; +static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); + +static inline void spinning_lock(struct xen_spinlock *xl) +{ + __get_cpu_var(lock_spinners) = xl; + wmb(); /* set lock of interest before count */ + asm(LOCK_PREFIX " incw %0" + : "+m" (xl->spinners) : : "memory"); +} + +static inline void unspinning_lock(struct xen_spinlock *xl) +{ + asm(LOCK_PREFIX " decw %0" + : "+m" (xl->spinners) : : "memory"); + wmb(); /* decrement count before clearing lock */ + __get_cpu_var(lock_spinners) = NULL; +} + +static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int irq = __get_cpu_var(lock_kicker_irq); + int ret; + + /* If kicker interrupts not initialized yet, just spin */ + if (irq == -1) + return 0; + + /* announce we're spinning */ + spinning_lock(xl); + + /* clear pending */ + xen_clear_irq_pending(irq); + + /* check again make sure it didn't become free while + we weren't looking */ + ret = xen_spin_trylock(lock); + if (ret) + goto out; + + /* block until irq becomes pending */ + xen_poll_irq(irq); + kstat_this_cpu.irqs[irq]++; + +out: + unspinning_lock(xl); + return ret; +} + +static void xen_spin_lock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int timeout; + u8 oldval; + + do { + timeout = 1 << 10; + + asm("1: xchgb %1,%0\n" + " testb %1,%1\n" + " jz 3f\n" + "2: rep;nop\n" + " cmpb $0,%0\n" + " je 1b\n" + " dec %2\n" + " jnz 2b\n" + "3:\n" + : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) + : "1" (1) + : "memory"); + + } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); +} + +static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) +{ + int cpu; + + for_each_online_cpu(cpu) { + /* XXX should mix up next cpu selection */ + if (per_cpu(lock_spinners, cpu) == xl) { + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); + break; + } + } +} + +static void xen_spin_unlock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + smp_wmb(); /* make sure no writes get moved after unlock */ + xl->lock = 0; /* release lock */ + + /* make sure unlock happens before kick */ + barrier(); + + if (unlikely(xl->spinners)) + xen_spin_unlock_slow(xl); +} + +static __cpuinit void xen_init_lock_cpu(int cpu) +{ + int irq; + const char *name; + + name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); + irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, + cpu, + xen_reschedule_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + name, + NULL); + + if (irq >= 0) { + disable_irq(irq); /* make sure it's never delivered */ + per_cpu(lock_kicker_irq, cpu) = irq; + } + + printk("cpu %d spinlock event irq %d\n", cpu, irq); +} + +static void __init xen_init_spinlocks(void) +{ + pv_lock_ops.spin_is_locked = xen_spin_is_locked; + pv_lock_ops.spin_is_contended = xen_spin_is_contended; + pv_lock_ops.spin_lock = xen_spin_lock; + pv_lock_ops.spin_trylock = xen_spin_trylock; + pv_lock_ops.spin_unlock = xen_spin_unlock; +} + static const struct smp_ops xen_smp_ops __initdata = { .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_smp_prepare_cpus, @@ -430,5 +600,5 @@ void __init xen_smp_init(void) { smp_ops = xen_smp_ops; xen_fill_possible_map(); - paravirt_use_bytelocks(); + xen_init_spinlocks(); } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 332dd63750a..0e0c28574af 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -734,6 +734,33 @@ static void restore_cpu_ipis(unsigned int cpu) } } +/* Clear an irq's pending state, in preparation for polling on it */ +void xen_clear_irq_pending(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + clear_evtchn(evtchn); +} + +/* Poll waiting for an irq to become pending. In the usual case, the + irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq(int irq) +{ + evtchn_port_t evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) { + struct sched_poll poll; + + poll.nr_ports = 1; + poll.timeout = 0; + poll.ports = &evtchn; + + if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) + BUG(); + } +} + void xen_irq_resume(void) { unsigned int cpu, irq, evtchn; diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h index f8d57ea1f05..8ded7472002 100644 --- a/include/asm-x86/xen/events.h +++ b/include/asm-x86/xen/events.h @@ -5,6 +5,7 @@ enum ipi_vector { XEN_RESCHEDULE_VECTOR, XEN_CALL_FUNCTION_VECTOR, XEN_CALL_FUNCTION_SINGLE_VECTOR, + XEN_SPIN_UNLOCK_VECTOR, XEN_NR_IPIS, }; diff --git a/include/xen/events.h b/include/xen/events.h index 67c4436554a..4680ff3fbc9 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -44,4 +44,11 @@ extern void notify_remote_via_irq(int irq); extern void xen_irq_resume(void); +/* Clear an irq's pending state, in preparation for polling on it */ +void xen_clear_irq_pending(int irq); + +/* Poll waiting for an irq to become pending. In the usual case, the + irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq(int irq); + #endif /* _XEN_EVENTS_H */ -- cgit v1.2.3-70-g09d2