diff options
Diffstat (limited to 'include/xen')
39 files changed, 2221 insertions, 463 deletions
diff --git a/include/xen/Kbuild b/include/xen/Kbuild deleted file mode 100644 index 84ad8f02fee..00000000000 --- a/include/xen/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -header-y += evtchn.h -header-y += privcmd.h diff --git a/include/xen/acpi.h b/include/xen/acpi.h new file mode 100644 index 00000000000..4ddd7dc4a61 --- /dev/null +++ b/include/xen/acpi.h @@ -0,0 +1,111 @@ +/****************************************************************************** + * acpi.h + * acpi file for domain 0 kernel + * + * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> + * Copyright (c) 2011 Yu Ke <ke.yu@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _XEN_ACPI_H +#define _XEN_ACPI_H + +#include <linux/types.h> + +#ifdef CONFIG_XEN_DOM0 +#include <asm/xen/hypervisor.h> +#include <xen/xen.h> +#include <linux/acpi.h> + +#define ACPI_MEMORY_DEVICE_CLASS        "memory" +#define ACPI_MEMORY_DEVICE_HID          "PNP0C80" +#define ACPI_MEMORY_DEVICE_NAME         "Hotplug Mem Device" + +int xen_stub_memory_device_init(void); +void xen_stub_memory_device_exit(void); + +#define ACPI_PROCESSOR_CLASS            "processor" +#define ACPI_PROCESSOR_DEVICE_HID       "ACPI0007" +#define ACPI_PROCESSOR_DEVICE_NAME      "Processor" + +int xen_stub_processor_init(void); +void xen_stub_processor_exit(void); + +void xen_pcpu_hotplug_sync(void); +int xen_pcpu_id(uint32_t acpi_id); + +static inline int xen_acpi_get_pxm(acpi_handle h) +{ +	unsigned long long pxm; +	acpi_status status; +	acpi_handle handle; +	acpi_handle phandle = h; + +	do { +		handle = phandle; +		status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); +		if (ACPI_SUCCESS(status)) +			return pxm; +		status = acpi_get_parent(handle, &phandle); +	} while (ACPI_SUCCESS(status)); + +	return -ENXIO; +} + +int xen_acpi_notify_hypervisor_sleep(u8 sleep_state, +				     u32 pm1a_cnt, u32 pm1b_cnd); +int xen_acpi_notify_hypervisor_extended_sleep(u8 sleep_state, +				     u32 val_a, u32 val_b); + +static inline int xen_acpi_suspend_lowlevel(void) +{ +	/* +	* Xen will save and restore CPU context, so +	* we can skip that and just go straight to +	* the suspend. +	*/ +	acpi_enter_sleep_state(ACPI_STATE_S3); +	return 0; +} + +static inline void xen_acpi_sleep_register(void) +{ +	if (xen_initial_domain()) { +		acpi_os_set_prepare_sleep( +			&xen_acpi_notify_hypervisor_sleep); +		acpi_os_set_prepare_extended_sleep( +			&xen_acpi_notify_hypervisor_extended_sleep); + +		acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel; +	} +} +#else +static inline void xen_acpi_sleep_register(void) +{ +} +#endif + +#endif	/* _XEN_ACPI_H */ diff --git a/include/xen/balloon.h b/include/xen/balloon.h new file mode 100644 index 00000000000..a4c1c6a9369 --- /dev/null +++ b/include/xen/balloon.h @@ -0,0 +1,43 @@ +/****************************************************************************** + * Xen balloon functionality + */ + +#define RETRY_UNLIMITED	0 + +struct balloon_stats { +	/* We aim for 'current allocation' == 'target allocation'. */ +	unsigned long current_pages; +	unsigned long target_pages; +	/* Number of pages in high- and low-memory balloons. */ +	unsigned long balloon_low; +	unsigned long balloon_high; +	unsigned long schedule_delay; +	unsigned long max_schedule_delay; +	unsigned long retry_count; +	unsigned long max_retry_count; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +	unsigned long hotplug_pages; +	unsigned long balloon_hotplug; +#endif +}; + +extern struct balloon_stats balloon_stats; + +void balloon_set_new_target(unsigned long target); + +int alloc_xenballooned_pages(int nr_pages, struct page **pages, +		bool highmem); +void free_xenballooned_pages(int nr_pages, struct page **pages); + +struct page *get_balloon_scratch_page(void); +void put_balloon_scratch_page(void); + +struct device; +#ifdef CONFIG_XEN_SELFBALLOONING +extern int register_xen_selfballooning(struct device *dev); +#else +static inline int register_xen_selfballooning(struct device *dev) +{ +	return -ENOSYS; +} +#endif diff --git a/include/xen/events.h b/include/xen/events.h index 646dd17d3aa..8bee7a75e85 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -2,11 +2,16 @@  #define _XEN_EVENTS_H  #include <linux/interrupt.h> +#ifdef CONFIG_PCI_MSI +#include <linux/msi.h> +#endif  #include <xen/interface/event_channel.h>  #include <asm/xen/hypercall.h>  #include <asm/xen/events.h> +unsigned xen_evtchn_nr_channels(void); +  int bind_evtchn_to_irq(unsigned int evtchn);  int bind_evtchn_to_irqhandler(unsigned int evtchn,  			      irq_handler_t handler, @@ -23,6 +28,12 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,  			   unsigned long irqflags,  			   const char *devname,  			   void *dev_id); +int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, +					  unsigned int remote_port, +					  irq_handler_t handler, +					  unsigned long irqflags, +					  const char *devname, +					  void *dev_id);  /*   * Common unbind function for all event sources. Takes IRQ to unbind from. @@ -31,8 +42,19 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,   */  void unbind_from_irqhandler(unsigned int irq, void *dev_id); +#define XEN_IRQ_PRIORITY_MAX     EVTCHN_FIFO_PRIORITY_MAX +#define XEN_IRQ_PRIORITY_DEFAULT EVTCHN_FIFO_PRIORITY_DEFAULT +#define XEN_IRQ_PRIORITY_MIN     EVTCHN_FIFO_PRIORITY_MIN +int xen_set_irq_priority(unsigned irq, unsigned priority); + +/* + * Allow extra references to event channels exposed to userspace by evtchn + */ +int evtchn_make_refcounted(unsigned int evtchn); +int evtchn_get(unsigned int evtchn); +void evtchn_put(unsigned int evtchn); +  void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector); -int resend_irq_on_evtchn(unsigned int irq);  void rebind_evtchn_irq(int evtchn, int irq);  static inline void notify_remote_via_evtchn(int port) @@ -41,9 +63,9 @@ static inline void notify_remote_via_evtchn(int port)  	(void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);  } -extern void notify_remote_via_irq(int irq); +void notify_remote_via_irq(int irq); -extern void xen_irq_resume(void); +void xen_irq_resume(void);  /* Clear an irq's pending state, in preparation for polling on it */  void xen_clear_irq_pending(int irq); @@ -60,33 +82,46 @@ void xen_poll_irq_timeout(int irq, u64 timeout);  /* Determine the IRQ which is bound to an event channel */  unsigned irq_from_evtchn(unsigned int evtchn); +int irq_from_virq(unsigned int cpu, unsigned int virq); +unsigned int evtchn_from_irq(unsigned irq);  /* Xen HVM evtchn vector callback */ -extern void xen_hvm_callback_vector(void); +void xen_hvm_callback_vector(void); +#ifdef CONFIG_TRACING +#define trace_xen_hvm_callback_vector xen_hvm_callback_vector +#endif  extern int xen_have_vector_callback;  int xen_set_callback_via(uint64_t via);  void xen_evtchn_do_upcall(struct pt_regs *regs);  void xen_hvm_evtchn_do_upcall(void); -/* Allocate an irq for a physical interrupt, given a gsi.  "Legacy" - * GSIs are identity mapped; others are dynamically allocated as - * usual. */ -int xen_allocate_pirq(unsigned gsi, int shareable, char *name); -int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name); +/* Bind a pirq for a physical interrupt to an irq. */ +int xen_bind_pirq_gsi_to_irq(unsigned gsi, +			     unsigned pirq, int shareable, char *name);  #ifdef CONFIG_PCI_MSI -/* Allocate an irq and a pirq to be used with MSIs. */ -void xen_allocate_pirq_msi(char *name, int *irq, int *pirq); -int xen_create_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int type); +/* Allocate a pirq for a MSI style physical interrupt. */ +int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc); +/* Bind an PSI pirq to an irq. */ +int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, +			     int pirq, int nvec, const char *name, domid_t domid);  #endif  /* De-allocates the above mentioned physical interrupt. */  int xen_destroy_irq(int irq); -/* Return vector allocated to pirq */ -int xen_vector_from_irq(unsigned pirq); +/* Return irq from pirq */ +int xen_irq_from_pirq(unsigned pirq); + +/* Return the pirq allocated to the irq. */ +int xen_pirq_from_irq(unsigned irq); + +/* Return the irq allocated to the gsi */ +int xen_irq_from_gsi(unsigned gsi); -/* Return gsi allocated to pirq */ -int xen_gsi_from_irq(unsigned pirq); +/* Determine whether to ignore this IRQ if it is passed to a guest. */ +int xen_test_irq_shared(int irq); +/* initialize Xen IRQ subsystem */ +void xen_init_IRQ(void);  #endif	/* _XEN_EVENTS_H */ diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h deleted file mode 100644 index 14e833ee4e0..00000000000 --- a/include/xen/evtchn.h +++ /dev/null @@ -1,88 +0,0 @@ -/****************************************************************************** - * evtchn.h - * - * Interface to /dev/xen/evtchn. - * - * Copyright (c) 2003-2005, K A Fraser - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __LINUX_PUBLIC_EVTCHN_H__ -#define __LINUX_PUBLIC_EVTCHN_H__ - -/* - * Bind a fresh port to VIRQ @virq. - * Return allocated port. - */ -#define IOCTL_EVTCHN_BIND_VIRQ				\ -	_IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) -struct ioctl_evtchn_bind_virq { -	unsigned int virq; -}; - -/* - * Bind a fresh port to remote <@remote_domain, @remote_port>. - * Return allocated port. - */ -#define IOCTL_EVTCHN_BIND_INTERDOMAIN			\ -	_IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) -struct ioctl_evtchn_bind_interdomain { -	unsigned int remote_domain, remote_port; -}; - -/* - * Allocate a fresh port for binding to @remote_domain. - * Return allocated port. - */ -#define IOCTL_EVTCHN_BIND_UNBOUND_PORT			\ -	_IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) -struct ioctl_evtchn_bind_unbound_port { -	unsigned int remote_domain; -}; - -/* - * Unbind previously allocated @port. - */ -#define IOCTL_EVTCHN_UNBIND				\ -	_IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) -struct ioctl_evtchn_unbind { -	unsigned int port; -}; - -/* - * Unbind previously allocated @port. - */ -#define IOCTL_EVTCHN_NOTIFY				\ -	_IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) -struct ioctl_evtchn_notify { -	unsigned int port; -}; - -/* Clear and reinitialise the event buffer. Clear error condition. */ -#define IOCTL_EVTCHN_RESET				\ -	_IOC(_IOC_NONE, 'E', 5, 0) - -#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 9a731706a01..5c1aba154b6 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -37,9 +37,16 @@  #ifndef __ASM_GNTTAB_H__  #define __ASM_GNTTAB_H__ -#include <asm/xen/hypervisor.h> +#include <asm/page.h> + +#include <xen/interface/xen.h>  #include <xen/interface/grant_table.h> -#include <asm/xen/grant_table.h> + +#include <asm/xen/hypervisor.h> + +#include <xen/features.h> + +#define GNTTAB_RESERVED_XENSTORE 1  /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */  #define NR_GRANT_FRAMES 4 @@ -57,6 +64,24 @@ int gnttab_resume(void);  int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,  				int readonly); +int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame, +					int flags, unsigned page_off, +					unsigned length); +int gnttab_grant_foreign_access_trans(domid_t domid, int flags, +				      domid_t trans_domid, +				      grant_ref_t trans_gref); + +/* + * Are sub-page grants available on this version of Xen?  Returns true if they + * are, and false if they're not. + */ +bool gnttab_subpage_grants_available(void); + +/* + * Are transitive grants available on this version of Xen?  Returns true if they + * are, and false if they're not. + */ +bool gnttab_trans_grants_available(void);  /*   * End access through the given grant reference, iff the grant entry is no @@ -103,19 +128,86 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);  void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,  				     unsigned long frame, int readonly); +int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid, +					    unsigned long frame, int flags, +					    unsigned page_off, +					    unsigned length); +int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid, +					  int flags, domid_t trans_domid, +					  grant_ref_t trans_gref);  void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,  				       unsigned long pfn); -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, +static inline void +gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, +		  uint32_t flags, grant_ref_t ref, domid_t domid) +{ +	if (flags & GNTMAP_contains_pte) +		map->host_addr = addr; +	else if (xen_feature(XENFEAT_auto_translated_physmap)) +		map->host_addr = __pa(addr); +	else +		map->host_addr = addr; + +	map->flags = flags; +	map->ref = ref; +	map->dom = domid; +} + +static inline void +gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr, +		    uint32_t flags, grant_handle_t handle) +{ +	if (flags & GNTMAP_contains_pte) +		unmap->host_addr = addr; +	else if (xen_feature(XENFEAT_auto_translated_physmap)) +		unmap->host_addr = __pa(addr); +	else +		unmap->host_addr = addr; + +	unmap->handle = handle; +	unmap->dev_bus_addr = 0; +} + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status); +int arch_gnttab_map_shared(xen_pfn_t *frames, unsigned long nr_gframes, +			   unsigned long max_nr_gframes, +			   void **__shared); +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes,  			   unsigned long max_nr_gframes, -			   struct grant_entry **__shared); -void arch_gnttab_unmap_shared(struct grant_entry *shared, -			      unsigned long nr_gframes); +			   grant_status_t **__shared); +void arch_gnttab_unmap(void *shared, unsigned long nr_gframes); -extern unsigned long xen_hvm_resume_frames; +struct grant_frames { +	xen_pfn_t *pfn; +	unsigned int count; +	void *vaddr; +}; +extern struct grant_frames xen_auto_xlat_grant_frames;  unsigned int gnttab_max_grant_frames(void); +int gnttab_setup_auto_xlat_frames(phys_addr_t addr); +void gnttab_free_auto_xlat_frames(void);  #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) +int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, +		    struct gnttab_map_grant_ref *kmap_ops, +		    struct page **pages, unsigned int count); +int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, +		      struct gnttab_map_grant_ref *kunmap_ops, +		      struct page **pages, unsigned int count); + +/* Perform a batch of grant map/copy operations. Retry every batch slot + * for which the hypervisor returns GNTST_eagain. This is typically due + * to paged out target frames. + * + * Will retry for 1, 2, ... 255 ms, i.e. 256 times during 32 seconds. + * + * Return value in each iand every status field of the batch guaranteed + * to not be GNTST_eagain. + */ +void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count); +void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count); +  #endif /* __ASM_GNTTAB_H__ */ diff --git a/include/xen/hvc-console.h b/include/xen/hvc-console.h index c3adde32669..b62dfef15f6 100644 --- a/include/xen/hvc-console.h +++ b/include/xen/hvc-console.h @@ -6,11 +6,13 @@ extern struct console xenboot_console;  #ifdef CONFIG_HVC_XEN  void xen_console_resume(void);  void xen_raw_console_write(const char *str); +__printf(1, 2)  void xen_raw_printk(const char *fmt, ...);  #else  static inline void xen_console_resume(void) { }  static inline void xen_raw_console_write(const char *str) { } -static inline void xen_raw_printk(const char *fmt, ...) { } +static inline __printf(1, 2) +void xen_raw_printk(const char *fmt, ...) { }  #endif  #endif	/* XEN_HVC_CONSOLE_H */ diff --git a/include/xen/hvm.h b/include/xen/hvm.h index b193fa2f9fd..63917a8de3b 100644 --- a/include/xen/hvm.h +++ b/include/xen/hvm.h @@ -5,6 +5,36 @@  #include <xen/interface/hvm/params.h>  #include <asm/xen/hypercall.h> +static const char *param_name(int op) +{ +#define PARAM(x) [HVM_PARAM_##x] = #x +	static const char *const names[] = { +		PARAM(CALLBACK_IRQ), +		PARAM(STORE_PFN), +		PARAM(STORE_EVTCHN), +		PARAM(PAE_ENABLED), +		PARAM(IOREQ_PFN), +		PARAM(BUFIOREQ_PFN), +		PARAM(TIMER_MODE), +		PARAM(HPET_ENABLED), +		PARAM(IDENT_PT), +		PARAM(DM_DOMAIN), +		PARAM(ACPI_S_STATE), +		PARAM(VM86_TSS), +		PARAM(VPT_ALIGN), +		PARAM(CONSOLE_PFN), +		PARAM(CONSOLE_EVTCHN), +	}; +#undef PARAM + +	if (op >= ARRAY_SIZE(names)) +		return "unknown"; + +	if (!names[op]) +		return "reserved"; + +	return names[op]; +}  static inline int hvm_get_parameter(int idx, uint64_t *value)  {  	struct xen_hvm_param xhv; @@ -14,8 +44,8 @@ static inline int hvm_get_parameter(int idx, uint64_t *value)  	xhv.index = idx;  	r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);  	if (r < 0) { -		printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n", -			idx, r); +		pr_err("Cannot get hvm parameter %s (%d): %d!\n", +		       param_name(idx), idx, r);  		return r;  	}  	*value = xhv.value; diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h index 2ae3cd24326..dc3193f4b58 100644 --- a/include/xen/interface/callback.h +++ b/include/xen/interface/callback.h @@ -27,7 +27,7 @@  #ifndef __XEN_PUBLIC_CALLBACK_H__  #define __XEN_PUBLIC_CALLBACK_H__ -#include "xen.h" +#include <xen/interface/xen.h>  /*   * Prototype for this hypercall is: @@ -36,7 +36,7 @@   * @extra_args == Operation-specific extra arguments (NULL if none).   */ -/* ia64, x86: Callback for event delivery. */ +/* x86: Callback for event delivery. */  #define CALLBACKTYPE_event                 0  /* x86: Failsafe callback when guest state cannot be restored by Xen. */ diff --git a/include/xen/interface/elfnote.h b/include/xen/interface/elfnote.h index 7a8262c375c..6f4eae328ca 100644 --- a/include/xen/interface/elfnote.h +++ b/include/xen/interface/elfnote.h @@ -51,7 +51,7 @@  /*   * The offset of the ELF paddr field from the acutal required - * psuedo-physical address (numeric). + * pseudo-physical address (numeric).   *   * This is used to maintain backwards compatibility with older kernels   * which wrote __PAGE_OFFSET into that field. This field defaults to 0 @@ -140,6 +140,19 @@   */  #define XEN_ELFNOTE_SUSPEND_CANCEL 14 +/* + * The features supported by this kernel (numeric). + * + * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a + * kernel to specify support for features that older hypervisors don't + * know about. The set of features 4.2 and newer hypervisors will + * consider supported by the kernel is the combination of the sets + * specified through this and the string note. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 +  #endif /* __XEN_PUBLIC_ELFNOTE_H__ */  /* diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h index 2090881c365..7e6acef5415 100644 --- a/include/xen/interface/event_channel.h +++ b/include/xen/interface/event_channel.h @@ -177,6 +177,52 @@ struct evtchn_unmask {  	evtchn_port_t port;  }; +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + *  1. <dom> may be specified as DOMID_SELF. + *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset		 10 +struct evtchn_reset { +	/* IN parameters. */ +	domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. + */ +#define EVTCHNOP_init_control    11 +struct evtchn_init_control { +	/* IN parameters. */ +	uint64_t control_gfn; +	uint32_t offset; +	uint32_t vcpu; +	/* OUT parameters. */ +	uint8_t link_bits; +	uint8_t _pad[7]; +}; + +/* + * EVTCHNOP_expand_array: add an additional page to the event array. + */ +#define EVTCHNOP_expand_array    12 +struct evtchn_expand_array { +	/* IN parameters. */ +	uint64_t array_gfn; +}; + +/* + * EVTCHNOP_set_priority: set the priority for an event channel. + */ +#define EVTCHNOP_set_priority    13 +struct evtchn_set_priority { +	/* IN parameters. */ +	uint32_t port; +	uint32_t priority; +}; +  struct evtchn_op {  	uint32_t cmd; /* EVTCHNOP_* */  	union { @@ -194,4 +240,39 @@ struct evtchn_op {  };  DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); +/* + * 2-level ABI + */ + +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +/* + * FIFO ABI + */ + +/* Events may have priorities from 0 (highest) to 15 (lowest). */ +#define EVTCHN_FIFO_PRIORITY_MAX     0 +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 +#define EVTCHN_FIFO_PRIORITY_MIN     15 + +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) + +typedef uint32_t event_word_t; + +#define EVTCHN_FIFO_PENDING 31 +#define EVTCHN_FIFO_MASKED  30 +#define EVTCHN_FIFO_LINKED  29 +#define EVTCHN_FIFO_BUSY    28 + +#define EVTCHN_FIFO_LINK_BITS 17 +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) + +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) + +struct evtchn_fifo_control_block { +	uint32_t     ready; +	uint32_t     _rsvd; +	event_word_t head[EVTCHN_FIFO_MAX_QUEUES]; +}; +  #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ diff --git a/include/xen/interface/features.h b/include/xen/interface/features.h index b6ca39a069d..131a6ccdba2 100644 --- a/include/xen/interface/features.h +++ b/include/xen/interface/features.h @@ -50,6 +50,9 @@  /* x86: pirq can be used by HVM guests */  #define XENFEAT_hvm_pirqs           10 +/* operation as Dom0 is supported */ +#define XENFEAT_dom0                      11 +  #define XENFEAT_NR_SUBMAPS 1  #endif /* __XEN_PUBLIC_FEATURES_H__ */ diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index 39e571796e3..e40fae9bf11 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -85,12 +85,22 @@   */  /* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/*   * A grant table comprises a packed array of grant entries in one or more   * page frames shared between Xen and a guest.   * [XEN]: This field is written by Xen and read by the sharing guest.   * [GST]: This field is written by the guest and read by Xen.   */ -struct grant_entry { + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility.  New guests should use version 2. + */ +struct grant_entry_v1 {      /* GTF_xxx: various type and flag information.  [XEN,GST] */      uint16_t flags;      /* The domain being granted foreign privileges. [GST] */ @@ -108,10 +118,13 @@ struct grant_entry {   *  GTF_permit_access: Allow @domid to map/access @frame.   *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame   *                       to this guest. Xen writes the page number to @frame. + *  GTF_transitive: Allow @domid to transitively access a subrange of + *                  @trans_grant in @trans_domid.  No mappings are allowed.   */  #define GTF_invalid         (0U<<0)  #define GTF_permit_access   (1U<<0)  #define GTF_accept_transfer (2U<<0) +#define GTF_transitive      (3U<<0)  #define GTF_type_mask       (3U<<0)  /* @@ -119,6 +132,9 @@ struct grant_entry {   *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]   *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]   *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + *  GTF_sub_page: Grant access to only a subrange of the page.  @domid + *                will only be allowed to copy from the grant, and not + *                map it. [GST]   */  #define _GTF_readonly       (2)  #define GTF_readonly        (1U<<_GTF_readonly) @@ -126,6 +142,8 @@ struct grant_entry {  #define GTF_reading         (1U<<_GTF_reading)  #define _GTF_writing        (4)  #define GTF_writing         (1U<<_GTF_writing) +#define _GTF_sub_page       (8) +#define GTF_sub_page        (1U<<_GTF_sub_page)  /*   * Subflags for GTF_accept_transfer: @@ -142,15 +160,81 @@ struct grant_entry {  #define _GTF_transfer_completed (3)  #define GTF_transfer_completed  (1U<<_GTF_transfer_completed) +/* + * Version 2 grant table entries.  These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ -/*********************************** - * GRANT TABLE QUERIES AND USES +/* + * Version 1 and version 2 grant entries share a common prefix.  The + * fields of the prefix are documented as part of struct + * grant_entry_v1.   */ +struct grant_entry_header { +    uint16_t flags; +    domid_t  domid; +};  /* - * Reference to a grant entry in a specified domain's grant table. + * Version 2 of the grant entry structure, here is an union because three + * different types are suppotted: full_page, sub_page and transitive. + */ +union grant_entry_v2 { +    struct grant_entry_header hdr; + +    /* +     * This member is used for V1-style full page grants, where either: +     * +     * -- hdr.type is GTF_accept_transfer, or +     * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. +     * +     * In that case, the frame field has the same semantics as the +     * field of the same name in the V1 entry structure. +     */ +    struct { +	struct grant_entry_header hdr; +	uint32_t pad0; +	uint64_t frame; +    } full_page; + +    /* +     * If the grant type is GTF_grant_access and GTF_sub_page is set, +     * @domid is allowed to access bytes [@page_off,@page_off+@length) +     * in frame @frame. +     */ +    struct { +	struct grant_entry_header hdr; +	uint16_t page_off; +	uint16_t length; +	uint64_t frame; +    } sub_page; + +    /* +     * If the grant is GTF_transitive, @domid is allowed to use the +     * grant @gref in domain @trans_domid, as if it was the local +     * domain.  Obviously, the transitive access must be compatible +     * with the original grant. +     */ +    struct { +	struct grant_entry_header hdr; +	domid_t trans_domid; +	uint16_t pad0; +	grant_ref_t gref; +    } transitive; + +    uint32_t __spacer[4]; /* Pad to a power of two */ +}; + +typedef uint16_t grant_status_t; + +/*********************************** + * GRANT TABLE QUERIES AND USES   */ -typedef uint32_t grant_ref_t;  /*   * Handle to track a mapping created via a grant reference. @@ -226,7 +310,7 @@ struct gnttab_setup_table {      uint32_t nr_frames;      /* OUT parameters. */      int16_t  status;              /* GNTST_* */ -    GUEST_HANDLE(ulong) frame_list; +    GUEST_HANDLE(xen_pfn_t) frame_list;  };  DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); @@ -254,7 +338,7 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);  #define GNTTABOP_transfer                4  struct gnttab_transfer {      /* IN parameters. */ -    unsigned long mfn; +    xen_pfn_t mfn;      domid_t       domid;      grant_ref_t   ref;      /* OUT parameters. */ @@ -291,7 +375,7 @@ struct gnttab_copy {  	struct {  		union {  			grant_ref_t ref; -			unsigned long   gmfn; +			xen_pfn_t   gmfn;  		} u;  		domid_t  domid;  		uint16_t offset; @@ -322,6 +406,79 @@ struct gnttab_query_size {  DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);  /* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by <handle> but atomically replace the page table entry with one + * pointing to the machine address under <new_addr>.  <new_addr> will be + * redirected to the null entry. + * NOTES: + *  1. The call may fail in an undefined manner if either mapping is not + *     tracked by <handle>. + *  2. After executing a batch of unmaps, it is guaranteed that no stale + *     mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_and_replace    7 +struct gnttab_unmap_and_replace { +    /* IN parameters. */ +    uint64_t host_addr; +    uint64_t new_addr; +    grant_handle_t handle; +    /* OUT parameters. */ +    int16_t  status;              /* GNTST_* */ +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); + +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure.  This operation can only be performed + * once in any given domain.  It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +#define GNTTABOP_set_version          8 +struct gnttab_set_version { +    /* IN parameters */ +    uint32_t version; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version); + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for <dom>. In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * <nr_frames> specify the size of vector <frame_list>. + * The frame addresses are returned in the <frame_list>. + * Only <nr_frames> addresses are returned, even if the table is larger. + * NOTES: + *  1. <dom> may be specified as DOMID_SELF. + *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +#define GNTTABOP_get_status_frames     9 +struct gnttab_get_status_frames { +    /* IN parameters. */ +    uint32_t nr_frames; +    domid_t  dom; +    /* OUT parameters. */ +    int16_t  status;              /* GNTST_* */ +    GUEST_HANDLE(uint64_t) frame_list; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain <dom>. + */ +#define GNTTABOP_get_version          10 +struct gnttab_get_version { +    /* IN parameters */ +    domid_t dom; +    uint16_t pad; +    /* OUT parameters */ +    uint32_t version; +}; +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); + +/*   * Bitfield values for update_pin_status.flags.   */   /* Map the grant entry for access by I/O devices. */ @@ -362,7 +519,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);  #define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */  #define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */  #define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */ -#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary */ +#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */ +#define GNTST_address_too_big (-11) /* transfer page address too large.      */ +#define GNTST_eagain          (-12) /* Operation not done; try again.        */  #define GNTTABOP_error_msgs {                   \      "okay",                                     \ @@ -375,7 +534,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);      "no spare translation slot in the I/O MMU", \      "permission denied",                        \      "bad page",                                 \ -    "copy arguments cross page boundary"        \ +    "copy arguments cross page boundary",       \ +    "page address size too large",              \ +    "operation not done; try again"             \  }  #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h index a4827f46ee9..956a0468286 100644 --- a/include/xen/interface/hvm/hvm_op.h +++ b/include/xen/interface/hvm/hvm_op.h @@ -43,4 +43,23 @@ struct xen_hvm_pagetable_dying {  typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;  DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); +enum hvmmem_type_t { +    HVMMEM_ram_rw,             /* Normal read/write guest RAM */ +    HVMMEM_ram_ro,             /* Read-only; writes are discarded */ +    HVMMEM_mmio_dm,            /* Reads and write go to the device model */ +}; + +#define HVMOP_get_mem_type    15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { +    /* Domain to be queried. */ +    domid_t domid; +    /* OUT variable. */ +    uint16_t mem_type; +    uint16_t pad[2]; /* align next field on 8-byte boundary */ +    /* IN variable. */ +    uint64_t pfn; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); +  #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h index 1888d8c157e..a6c79911e72 100644 --- a/include/xen/interface/hvm/params.h +++ b/include/xen/interface/hvm/params.h @@ -21,7 +21,7 @@  #ifndef __XEN_PUBLIC_HVM_PARAMS_H__  #define __XEN_PUBLIC_HVM_PARAMS_H__ -#include "hvm_op.h" +#include <xen/interface/hvm/hvm_op.h>  /*   * Parameter space for HVMOP_{set,get}_param. @@ -90,6 +90,10 @@  /* Boolean: Enable aligning all periodic vpts to reduce interrupts */  #define HVM_PARAM_VPT_ALIGN    16 -#define HVM_NR_PARAMS          17 +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN    17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +#define HVM_NR_PARAMS          19  #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index c2d1fa4dc1e..c33e1c489eb 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h @@ -9,8 +9,8 @@  #ifndef __XEN_PUBLIC_IO_BLKIF_H__  #define __XEN_PUBLIC_IO_BLKIF_H__ -#include "ring.h" -#include "../grant_table.h" +#include <xen/interface/io/ring.h> +#include <xen/interface/grant_table.h>  /*   * Front->back notifications: When enqueuing a new request, sending a @@ -45,26 +45,164 @@ typedef uint64_t blkif_sector_t;  #define BLKIF_OP_WRITE_BARRIER     2  /* + * Recognised if "feature-flush-cache" is present in backend xenbus + * info.  A flush will ask the underlying storage hardware to flush its + * non-volatile caches as appropriate.  The "feature-flush-cache" node + * contains a boolean indicating whether flush requests are likely to + * succeed or fail. Either way, a flush request may fail at any time + * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying + * block-device hardware. The boolean simply indicates whether or not it + * is worthwhile for the frontend to attempt flushes.  If a backend does + * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the + * "feature-flush-cache" node! + */ +#define BLKIF_OP_FLUSH_DISKCACHE   3 + +/* + * Recognised only if "feature-discard" is present in backend xenbus info. + * The "feature-discard" node contains a boolean indicating whether trim + * (ATA) or unmap (SCSI) - conviently called discard requests are likely + * to succeed or fail. Either way, a discard request + * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by + * the underlying block-device hardware. The boolean simply indicates whether + * or not it is worthwhile for the frontend to attempt discard requests. + * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* + * create the "feature-discard" node! + * + * Discard operation is a request for the underlying block device to mark + * extents to be erased. However, discard does not guarantee that the blocks + * will be erased from the device - it is just a hint to the device + * controller that these blocks are no longer in use. What the device + * controller does with that information is left to the controller. + * Discard operations are passed with sector_number as the + * sector index to begin discard operations at and nr_sectors as the number of + * sectors to be discarded. The specified sectors should be discarded if the + * underlying block device supports trim (ATA) or unmap (SCSI) operations, + * or a BLKIF_RSP_EOPNOTSUPP  should be returned. + * More information about trim/unmap operations at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + *     Interface%20manuals/100293068c.pdf + * The backend can optionally provide three extra XenBus attributes to + * further optimize the discard functionality: + * 'discard-alignment' - Devices that support discard functionality may + * internally allocate space in units that are bigger than the exported + * logical block size. The discard-alignment parameter indicates how many bytes + * the beginning of the partition is offset from the internal allocation unit's + * natural alignment. + * 'discard-granularity'  - Devices that support discard functionality may + * internally allocate space using units that are bigger than the logical block + * size. The discard-granularity parameter indicates the size of the internal + * allocation unit in bytes if reported by the device. Otherwise the + * discard-granularity will be set to match the device's physical block size. + * 'discard-secure' - All copies of the discarded sectors (potentially created + * by garbage collection) must also be erased.  To use this feature, the flag + * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim. + */ +#define BLKIF_OP_DISCARD           5 + +/* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT          6 + +/*   * Maximum scatter/gather segments per request.   * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.   * NB. This could be 12 if the ring indexes weren't stored in the same page.   */  #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 -struct blkif_request { -	uint8_t        operation;    /* BLKIF_OP_???                         */ -	uint8_t        nr_segments;  /* number of segments                   */ -	blkif_vdev_t   handle;       /* only for read/write requests         */ -	uint64_t       id;           /* private guest value, echoed in resp  */ -	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */ -	struct blkif_request_segment { +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +struct blkif_request_segment {  		grant_ref_t gref;        /* reference to I/O buffer frame        */  		/* @first_sect: first sector in frame to transfer (inclusive).   */  		/* @last_sect: last sector in frame to transfer (inclusive).     */  		uint8_t     first_sect, last_sect; -	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];  }; +struct blkif_request_rw { +	uint8_t        nr_segments;  /* number of segments                   */ +	blkif_vdev_t   handle;       /* only for read/write requests         */ +#ifndef CONFIG_X86_32 +	uint32_t       _pad1;	     /* offsetof(blkif_request,u.rw.id) == 8 */ +#endif +	uint64_t       id;           /* private guest value, echoed in resp  */ +	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */ +	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +} __attribute__((__packed__)); + +struct blkif_request_discard { +	uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero.        */ +#define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0          */ +	blkif_vdev_t   _pad1;        /* only for read/write requests         */ +#ifndef CONFIG_X86_32 +	uint32_t       _pad2;        /* offsetof(blkif_req..,u.discard.id)==8*/ +#endif +	uint64_t       id;           /* private guest value, echoed in resp  */ +	blkif_sector_t sector_number; +	uint64_t       nr_sectors; +	uint8_t        _pad3; +} __attribute__((__packed__)); + +struct blkif_request_other { +	uint8_t      _pad1; +	blkif_vdev_t _pad2;        /* only for read/write requests         */ +#ifndef CONFIG_X86_32 +	uint32_t     _pad3;        /* offsetof(blkif_req..,u.other.id)==8*/ +#endif +	uint64_t     id;           /* private guest value, echoed in resp  */ +} __attribute__((__packed__)); + +struct blkif_request_indirect { +	uint8_t        indirect_op; +	uint16_t       nr_segments; +#ifndef CONFIG_X86_32 +	uint32_t       _pad1;        /* offsetof(blkif_...,u.indirect.id) == 8 */ +#endif +	uint64_t       id; +	blkif_sector_t sector_number; +	blkif_vdev_t   handle; +	uint16_t       _pad2; +	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifndef CONFIG_X86_32 +	uint32_t      _pad3;         /* make it 64 byte aligned */ +#else +	uint64_t      _pad3;         /* make it 64 byte aligned */ +#endif +} __attribute__((__packed__)); + +struct blkif_request { +	uint8_t        operation;    /* BLKIF_OP_???                         */ +	union { +		struct blkif_request_rw rw; +		struct blkif_request_discard discard; +		struct blkif_request_other other; +		struct blkif_request_indirect indirect; +	} u; +} __attribute__((__packed__)); +  struct blkif_response {  	uint64_t        id;              /* copied from request */  	uint8_t         operation;       /* copied from request */ @@ -91,4 +229,25 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);  #define VDISK_REMOVABLE    0x2  #define VDISK_READONLY     0x4 +/* Xen-defined major numbers for virtual disks, they look strangely + * familiar */ +#define XEN_IDE0_MAJOR	3 +#define XEN_IDE1_MAJOR	22 +#define XEN_SCSI_DISK0_MAJOR	8 +#define XEN_SCSI_DISK1_MAJOR	65 +#define XEN_SCSI_DISK2_MAJOR	66 +#define XEN_SCSI_DISK3_MAJOR	67 +#define XEN_SCSI_DISK4_MAJOR	68 +#define XEN_SCSI_DISK5_MAJOR	69 +#define XEN_SCSI_DISK6_MAJOR	70 +#define XEN_SCSI_DISK7_MAJOR	71 +#define XEN_SCSI_DISK8_MAJOR	128 +#define XEN_SCSI_DISK9_MAJOR	129 +#define XEN_SCSI_DISK10_MAJOR	130 +#define XEN_SCSI_DISK11_MAJOR	131 +#define XEN_SCSI_DISK12_MAJOR	132 +#define XEN_SCSI_DISK13_MAJOR	133 +#define XEN_SCSI_DISK14_MAJOR	134 +#define XEN_SCSI_DISK15_MAJOR	135 +  #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h index 518481c95f1..70054cc0708 100644 --- a/include/xen/interface/io/netif.h +++ b/include/xen/interface/io/netif.h @@ -9,8 +9,26 @@  #ifndef __XEN_PUBLIC_IO_NETIF_H__  #define __XEN_PUBLIC_IO_NETIF_H__ -#include "ring.h" -#include "../grant_table.h" +#include <xen/interface/io/ring.h> +#include <xen/interface/grant_table.h> + +/* + * Older implementation of Xen network frontend / backend has an + * implicit dependency on the MAX_SKB_FRAGS as the maximum number of + * ring slots a skb can use. Netfront / netback may not work as + * expected when frontend and backend have different MAX_SKB_FRAGS. + * + * A better approach is to add mechanism for netfront / netback to + * negotiate this value. However we cannot fix all possible + * frontends, so we need to define a value which states the minimum + * slots backend must support. + * + * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS + * (18), which is proved to work with most frontends. Any new backend + * which doesn't negotiate with frontend should expect frontend to + * send a valid packet using slots up to this value. + */ +#define XEN_NETIF_NR_SLOTS_MIN 18  /*   * Notifications after enqueuing any type of message should be conditional on @@ -20,52 +38,134 @@   * that it cannot safely queue packets (as it may not be kicked to send them).   */ + /* + * "feature-split-event-channels" is introduced to separate guest TX + * and RX notificaion. Backend either doesn't support this feature or + * advertise it via xenstore as 0 (disabled) or 1 (enabled). + * + * To make use of this feature, frontend should allocate two event + * channels for TX and RX, advertise them to backend as + * "event-channel-tx" and "event-channel-rx" respectively. If frontend + * doesn't want to use this feature, it just writes "event-channel" + * node as before. + */ + +/* + * Multiple transmit and receive queues: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vif, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues", set to the number they wish to use, which + * must be greater than zero, and no more than the value reported by the backend + * in "multi-queue-max-queues". + * + * Queues replicate the shared rings and event channels. + * "feature-split-event-channels" may optionally be used when using + * multiple queues, but is not mandatory. + * + * Each queue consists of one shared ring pair, i.e. there must be the same + * number of tx and rx rings. + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, + * instead writing those keys under sub-keys having the name "queue-N" where + * N is the integer ID of the queue for which those keys belong. Queues + * are indexed from zero. For example, a frontend with two queues and split + * event channels must write the following set of queue-related keys: + * + * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vif/0/queue-0 = "" + * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>" + * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>" + * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>" + * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>" + * /local/domain/1/device/vif/0/queue-1 = "" + * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>" + * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1" + * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>" + * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>" + * + * If there is any inconsistency in the XenStore data, the backend may + * choose not to connect any queues, instead treating the request as an + * error. This includes scenarios where more (or fewer) queues were + * requested than the frontend provided details for. + * + * Mapping of packets to queues is considered to be a function of the + * transmitting system (backend or frontend) and is not negotiated + * between the two. Guests are free to transmit packets on any queue + * they choose, provided it has been set up correctly. Guests must be + * prepared to receive packets on any queue they have requested be set up. + */ + +/* + * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum + * offload off or on. If it is missing then the feature is assumed to be on. + * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum + * offload on or off. If it is missing then the feature is assumed to be off. + */ + +/* + * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to + * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither + * frontends nor backends are assumed to be capable unless the flags are + * present. + */ +  /*   * This is the 'wire' format for packets: - *  Request 1: netif_tx_request -- NETTXF_* (any flags) - * [Request 2: netif_tx_extra]  (only if request 1 has NETTXF_extra_info) - * [Request 3: netif_tx_extra]  (only if request 2 has XEN_NETIF_EXTRA_MORE) - *  Request 4: netif_tx_request -- NETTXF_more_data - *  Request 5: netif_tx_request -- NETTXF_more_data + *  Request 1: xen_netif_tx_request  -- XEN_NETTXF_* (any flags) + * [Request 2: xen_netif_extra_info]    (only if request 1 has XEN_NETTXF_extra_info) + * [Request 3: xen_netif_extra_info]    (only if request 2 has XEN_NETIF_EXTRA_MORE) + *  Request 4: xen_netif_tx_request  -- XEN_NETTXF_more_data + *  Request 5: xen_netif_tx_request  -- XEN_NETTXF_more_data   *  ... - *  Request N: netif_tx_request -- 0 + *  Request N: xen_netif_tx_request  -- 0   */  /* Protocol checksum field is blank in the packet (hardware offload)? */ -#define _NETTXF_csum_blank     (0) -#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank) +#define _XEN_NETTXF_csum_blank		(0) +#define  XEN_NETTXF_csum_blank		(1U<<_XEN_NETTXF_csum_blank)  /* Packet data has been validated against protocol checksum. */ -#define _NETTXF_data_validated (1) -#define  NETTXF_data_validated (1U<<_NETTXF_data_validated) +#define _XEN_NETTXF_data_validated	(1) +#define  XEN_NETTXF_data_validated	(1U<<_XEN_NETTXF_data_validated)  /* Packet continues in the next request descriptor. */ -#define _NETTXF_more_data      (2) -#define  NETTXF_more_data      (1U<<_NETTXF_more_data) +#define _XEN_NETTXF_more_data		(2) +#define  XEN_NETTXF_more_data		(1U<<_XEN_NETTXF_more_data)  /* Packet to be followed by extra descriptor(s). */ -#define _NETTXF_extra_info     (3) -#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info) +#define _XEN_NETTXF_extra_info		(3) +#define  XEN_NETTXF_extra_info		(1U<<_XEN_NETTXF_extra_info) +#define XEN_NETIF_MAX_TX_SIZE 0xFFFF  struct xen_netif_tx_request {      grant_ref_t gref;      /* Reference to buffer page */      uint16_t offset;       /* Offset within buffer page */ -    uint16_t flags;        /* NETTXF_* */ +    uint16_t flags;        /* XEN_NETTXF_* */      uint16_t id;           /* Echoed in response message. */      uint16_t size;         /* Packet size in bytes.       */  }; -/* Types of netif_extra_info descriptors. */ -#define XEN_NETIF_EXTRA_TYPE_NONE  (0)  /* Never used - invalid */ -#define XEN_NETIF_EXTRA_TYPE_GSO   (1)  /* u.gso */ -#define XEN_NETIF_EXTRA_TYPE_MAX   (2) +/* Types of xen_netif_extra_info descriptors. */ +#define XEN_NETIF_EXTRA_TYPE_NONE	(0)  /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO	(1)  /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MAX	(2) -/* netif_extra_info flags. */ -#define _XEN_NETIF_EXTRA_FLAG_MORE (0) -#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) +/* xen_netif_extra_info flags. */ +#define _XEN_NETIF_EXTRA_FLAG_MORE	(0) +#define  XEN_NETIF_EXTRA_FLAG_MORE	(1U<<_XEN_NETIF_EXTRA_FLAG_MORE) -/* GSO types - only TCPv4 currently supported. */ -#define XEN_NETIF_GSO_TYPE_TCPV4        (1) +/* GSO types */ +#define XEN_NETIF_GSO_TYPE_NONE		(0) +#define XEN_NETIF_GSO_TYPE_TCPV4	(1) +#define XEN_NETIF_GSO_TYPE_TCPV6	(2)  /*   * This structure needs to fit within both netif_tx_request and @@ -107,7 +207,7 @@ struct xen_netif_extra_info {  struct xen_netif_tx_response {  	uint16_t id; -	int16_t  status;       /* NETIF_RSP_* */ +	int16_t  status;       /* XEN_NETIF_RSP_* */  };  struct xen_netif_rx_request { @@ -116,25 +216,29 @@ struct xen_netif_rx_request {  };  /* Packet data has been validated against protocol checksum. */ -#define _NETRXF_data_validated (0) -#define  NETRXF_data_validated (1U<<_NETRXF_data_validated) +#define _XEN_NETRXF_data_validated	(0) +#define  XEN_NETRXF_data_validated	(1U<<_XEN_NETRXF_data_validated)  /* Protocol checksum field is blank in the packet (hardware offload)? */ -#define _NETRXF_csum_blank     (1) -#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank) +#define _XEN_NETRXF_csum_blank		(1) +#define  XEN_NETRXF_csum_blank		(1U<<_XEN_NETRXF_csum_blank)  /* Packet continues in the next request descriptor. */ -#define _NETRXF_more_data      (2) -#define  NETRXF_more_data      (1U<<_NETRXF_more_data) +#define _XEN_NETRXF_more_data		(2) +#define  XEN_NETRXF_more_data		(1U<<_XEN_NETRXF_more_data)  /* Packet to be followed by extra descriptor(s). */ -#define _NETRXF_extra_info     (3) -#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info) +#define _XEN_NETRXF_extra_info		(3) +#define  XEN_NETRXF_extra_info		(1U<<_XEN_NETRXF_extra_info) + +/* GSO Prefix descriptor. */ +#define _XEN_NETRXF_gso_prefix		(4) +#define  XEN_NETRXF_gso_prefix		(1U<<_XEN_NETRXF_gso_prefix)  struct xen_netif_rx_response {      uint16_t id;      uint16_t offset;       /* Offset in page of start of received packet  */ -    uint16_t flags;        /* NETRXF_* */ +    uint16_t flags;        /* XEN_NETRXF_* */      int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */  }; @@ -149,10 +253,10 @@ DEFINE_RING_TYPES(xen_netif_rx,  		  struct xen_netif_rx_request,  		  struct xen_netif_rx_response); -#define NETIF_RSP_DROPPED         -2 -#define NETIF_RSP_ERROR           -1 -#define NETIF_RSP_OKAY             0 -/* No response: used for auxiliary requests (e.g., netif_tx_extra). */ -#define NETIF_RSP_NULL             1 +#define XEN_NETIF_RSP_DROPPED	-2 +#define XEN_NETIF_RSP_ERROR	-1 +#define XEN_NETIF_RSP_OKAY	 0 +/* No response: used for auxiliary requests (e.g., xen_netif_extra_info). */ +#define XEN_NETIF_RSP_NULL	 1  #endif diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h index 01fc8ae5f0b..545a14ba0bb 100644 --- a/include/xen/interface/io/protocols.h +++ b/include/xen/interface/io/protocols.h @@ -3,17 +3,17 @@  #define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"  #define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi" -#define XEN_IO_PROTO_ABI_IA64       "ia64-abi"  #define XEN_IO_PROTO_ABI_POWERPC64  "powerpc64-abi" +#define XEN_IO_PROTO_ABI_ARM        "arm-abi"  #if defined(__i386__)  # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32  #elif defined(__x86_64__)  # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 -#elif defined(__ia64__) -# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64  #elif defined(__powerpc64__)  # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 +#elif defined(__arm__) || defined(__aarch64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM  #else  # error arch fixup needed here  #endif diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index e8cbf431c8c..7d28aff605c 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -24,8 +24,15 @@ typedef unsigned int RING_IDX;   * A ring contains as many entries as will fit, rounded down to the nearest   * power of two (so we can mask with (size-1) to loop around).   */ -#define __RING_SIZE(_s, _sz) \ -    (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __CONST_RING_SIZE(_s, _sz)				\ +	(__RD32(((_sz) - offsetof(struct _s##_sring, ring)) /	\ +		sizeof(((struct _s##_sring *)0)->ring[0]))) + +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz)						\ +	(__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))  /*   * Macros to make the correct C datatypes for a new kind of ring. @@ -181,6 +188,11 @@ struct __name##_back_ring {						\  #define RING_REQUEST_CONS_OVERFLOW(_r, _cons)				\      (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)               \ +    (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +  #define RING_PUSH_REQUESTS(_r) do {					\      wmb(); /* back sees requests /before/ updated producer index */	\      (_r)->sring->req_prod = (_r)->req_prod_pvt;				\ diff --git a/include/xen/interface/io/tpmif.h b/include/xen/interface/io/tpmif.h new file mode 100644 index 00000000000..28e7dcd75e8 --- /dev/null +++ b/include/xen/interface/io/tpmif.h @@ -0,0 +1,52 @@ +/****************************************************************************** + * tpmif.h + * + * TPM I/O interface for Xen guest OSes, v2 + * + * This file is in the public domain. + * + */ + +#ifndef __XEN_PUBLIC_IO_TPMIF_H__ +#define __XEN_PUBLIC_IO_TPMIF_H__ + +/* + * Xenbus state machine + * + * Device open: + *   1. Both ends start in XenbusStateInitialising + *   2. Backend transitions to InitWait (frontend does not wait on this step) + *   3. Frontend populates ring-ref, event-channel, feature-protocol-v2 + *   4. Frontend transitions to Initialised + *   5. Backend maps grant and event channel, verifies feature-protocol-v2 + *   6. Backend transitions to Connected + *   7. Frontend verifies feature-protocol-v2, transitions to Connected + * + * Device close: + *   1. State is changed to XenbusStateClosing + *   2. Frontend transitions to Closed + *   3. Backend unmaps grant and event, changes state to InitWait + */ + +enum vtpm_shared_page_state { +	VTPM_STATE_IDLE,         /* no contents / vTPM idle / cancel complete */ +	VTPM_STATE_SUBMIT,       /* request ready / vTPM working */ +	VTPM_STATE_FINISH,       /* response ready / vTPM idle */ +	VTPM_STATE_CANCEL,       /* cancel requested / vTPM working */ +}; +/* The backend should only change state to IDLE or FINISH, while the + * frontend should only change to SUBMIT or CANCEL. */ + + +struct vtpm_shared_page { +	uint32_t length;         /* request/response length in bytes */ + +	uint8_t state;           /* enum vtpm_shared_page_state */ +	uint8_t locality;        /* for the current request */ +	uint8_t pad; + +	uint8_t nr_extra_pages;  /* extra pages for long packets; may be zero */ +	uint32_t extra_pages[0]; /* grant IDs; length in nr_extra_pages */ +}; + +#endif diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h index 99fcffb372d..794deb07eb5 100644 --- a/include/xen/interface/io/xs_wire.h +++ b/include/xen/interface/io/xs_wire.h @@ -26,7 +26,11 @@ enum xsd_sockmsg_type      XS_SET_PERMS,      XS_WATCH_EVENT,      XS_ERROR, -    XS_IS_DOMAIN_INTRODUCED +    XS_IS_DOMAIN_INTRODUCED, +    XS_RESUME, +    XS_SET_TARGET, +    XS_RESTRICT, +    XS_RESET_WATCHES,  };  #define XS_WRITE_NONE "NONE" @@ -84,4 +88,7 @@ struct xenstore_domain_interface {      XENSTORE_RING_IDX rsp_cons, rsp_prod;  }; +/* Violating this is very bad.  See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 +  #endif /* _XS_WIRE_H */ diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h index d7a6c13bde6..2ecfe4f700d 100644 --- a/include/xen/interface/memory.h +++ b/include/xen/interface/memory.h @@ -31,10 +31,10 @@ struct xen_memory_reservation {       *   OUT: GMFN bases of extents that were allocated       *   (NB. This command also updates the mach_to_phys translation table)       */ -    GUEST_HANDLE(ulong) extent_start; +    GUEST_HANDLE(xen_pfn_t) extent_start;      /* Number of extents, and size/alignment of each (2^extent_order pages). */ -    unsigned long  nr_extents; +    xen_ulong_t  nr_extents;      unsigned int   extent_order;      /* @@ -92,7 +92,7 @@ struct xen_memory_exchange {       *     command will be non-zero.       *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!       */ -    unsigned long nr_exchanged; +    xen_ulong_t nr_exchanged;  };  DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); @@ -130,7 +130,7 @@ struct xen_machphys_mfn_list {       * any large discontiguities in the machine address space, 2MB gaps in       * the machphys table will be represented by an MFN base of zero.       */ -    GUEST_HANDLE(ulong) extent_start; +    GUEST_HANDLE(xen_pfn_t) extent_start;      /*       * Number of extents written to the above array. This will be smaller @@ -141,6 +141,27 @@ struct xen_machphys_mfn_list {  DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);  /* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping     12 +struct xen_machphys_mapping { +    xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */ +    xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); + +#define XENMAPSPACE_shared_info  0 /* shared info page */ +#define XENMAPSPACE_grant_table  1 /* grant table page */ +#define XENMAPSPACE_gmfn         2 /* GMFN */ +#define XENMAPSPACE_gmfn_range   3 /* GMFN range, XENMEM_add_to_physmap only. */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, +				    * XENMEM_add_to_physmap_range only. +				    */ + +/*   * Sets the GPFN at which a particular page appears in the specified guest's   * pseudophysical address space.   * arg == addr of xen_add_to_physmap_t. @@ -150,41 +171,46 @@ struct xen_add_to_physmap {      /* Which domain to change the mapping for. */      domid_t domid; +    /* Number of pages to go through for gmfn_range */ +    uint16_t    size; +      /* Source mapping space. */ -#define XENMAPSPACE_shared_info 0 /* shared info page */ -#define XENMAPSPACE_grant_table 1 /* grant table page */      unsigned int space;      /* Index into source mapping space. */ -    unsigned long idx; +    xen_ulong_t idx;      /* GPFN where the source mapping page should appear. */ -    unsigned long gpfn; +    xen_pfn_t gpfn;  };  DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); -/* - * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error - * code on failure. This call only works for auto-translated guests. - */ -#define XENMEM_translate_gpfn_list  8 -struct xen_translate_gpfn_list { -    /* Which domain to translate for? */ +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list  8*/ + +#define XENMEM_add_to_physmap_range 23 +struct xen_add_to_physmap_range { +    /* IN */ +    /* Which domain to change the mapping for. */      domid_t domid; +    uint16_t space; /* => enum phys_map_space */ -    /* Length of list. */ -    unsigned long nr_gpfns; +    /* Number of pages to go through */ +    uint16_t size; +    domid_t foreign_domid; /* IFF gmfn_foreign */ -    /* List of GPFNs to translate. */ -    GUEST_HANDLE(ulong) gpfn_list; +    /* Indexes into space being mapped. */ +    GUEST_HANDLE(xen_ulong_t) idxs; -    /* -     * Output list to contain MFN translations. May be the same as the input -     * list (in which case each input GPFN is overwritten with the output MFN). -     */ -    GUEST_HANDLE(ulong) mfn_list; +    /* GPFN in domid where the source mapping page should appear. */ +    GUEST_HANDLE(xen_pfn_t) gpfns; + +    /* OUT */ + +    /* Per index error code. */ +    GUEST_HANDLE(int) errs;  }; -DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range);  /*   * Returns the pseudo-physical memory map as it was when the domain @@ -221,4 +247,20 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);   * during a driver critical region.   */  extern spinlock_t xen_reservation_lock; + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap      15 +struct xen_remove_from_physmap { +    /* Which domain to change the mapping for. */ +    domid_t domid; + +    /* GPFN of the current mapping of the page. */ +    xen_pfn_t gpfn; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); +  #endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 2b2c66c3df0..610dba9b620 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -39,6 +39,27 @@ struct physdev_eoi {  };  /* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v1       17 +/* + * Register a shared page for the hypervisor to indicate whether the + * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to + * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of + * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by + * Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v2       28 +struct physdev_pirq_eoi_gmfn { +    /* IN */ +    xen_ulong_t gmfn; +}; + +/*   * Query the status of an IRQ line.   * @arg == pointer to physdev_irq_status_query structure.   */ @@ -109,6 +130,8 @@ struct physdev_irq {  #define MAP_PIRQ_TYPE_MSI		0x0  #define MAP_PIRQ_TYPE_GSI		0x1  #define MAP_PIRQ_TYPE_UNKNOWN		0x2 +#define MAP_PIRQ_TYPE_MSI_SEG		0x3 +#define MAP_PIRQ_TYPE_MULTI_MSI		0x4  #define PHYSDEVOP_map_pirq		13  struct physdev_map_pirq { @@ -119,11 +142,16 @@ struct physdev_map_pirq {      int index;      /* IN or OUT */      int pirq; -    /* IN */ +    /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */      int bus;      /* IN */      int devfn; -    /* IN */ +    /* IN +     * - For MSI-X contains entry number. +     * - For MSI with ..._MULTI_MSI contains number of vectors. +     * OUT (..._MULTI_MSI only) +     * - Number of vectors allocated. +     */      int entry_nr;      /* IN */      uint64_t table_base; @@ -144,6 +172,13 @@ struct physdev_manage_pci {  	uint8_t devfn;  }; +#define PHYSDEVOP_restore_msi            19 +struct physdev_restore_msi { +	/* IN */ +	uint8_t bus; +	uint8_t devfn; +}; +  #define PHYSDEVOP_manage_pci_add_ext	20  struct physdev_manage_pci_ext {  	/* IN */ @@ -188,6 +223,80 @@ struct physdev_nr_pirqs {      uint32_t nr_pirqs;  }; +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq    23 +struct physdev_get_free_pirq { +    /* IN */  +    int type; +    /* OUT */ +    uint32_t pirq; +}; + +#define XEN_PCI_DEV_EXTFN              0x1 +#define XEN_PCI_DEV_VIRTFN             0x2 +#define XEN_PCI_DEV_PXM                0x4 + +#define XEN_PCI_MMCFG_RESERVED         0x1 + +#define PHYSDEVOP_pci_mmcfg_reserved    24 +struct physdev_pci_mmcfg_reserved { +    uint64_t address; +    uint16_t segment; +    uint8_t start_bus; +    uint8_t end_bus; +    uint32_t flags; +}; + +#define PHYSDEVOP_pci_device_add        25 +struct physdev_pci_device_add { +    /* IN */ +    uint16_t seg; +    uint8_t bus; +    uint8_t devfn; +    uint32_t flags; +    struct { +        uint8_t bus; +        uint8_t devfn; +    } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +    uint32_t optarr[]; +#elif defined(__GNUC__) +    uint32_t optarr[0]; +#endif +}; + +#define PHYSDEVOP_pci_device_remove     26 +#define PHYSDEVOP_restore_msi_ext       27 +/* + * Dom0 should use these two to announce MMIO resources assigned to + * MSI-X capable devices won't (prepare) or may (release) change. + */ +#define PHYSDEVOP_prepare_msix          30 +#define PHYSDEVOP_release_msix          31 +struct physdev_pci_device { +    /* IN */ +    uint16_t seg; +    uint8_t bus; +    uint8_t devfn; +}; + +#define PHYSDEVOP_DBGP_RESET_PREPARE    1 +#define PHYSDEVOP_DBGP_RESET_DONE       2 + +#define PHYSDEVOP_DBGP_BUS_UNKNOWN      0 +#define PHYSDEVOP_DBGP_BUS_PCI          1 + +#define PHYSDEVOP_dbgp_op               29 +struct physdev_dbgp_op { +    /* IN */ +    uint8_t op; +    uint8_t bus; +    union { +        struct physdev_pci_device pci; +    } u; +}; +  /*   * Notify that some PIRQ-bound event channels have been unmasked.   * ** This command is obsolete since interface version 0x00030202 and is ** diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h new file mode 100644 index 00000000000..f1331e3e727 --- /dev/null +++ b/include/xen/interface/platform.h @@ -0,0 +1,380 @@ +/****************************************************************************** + * platform.h + * + * Hardware platform operations. Intended for use by domain-0 kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_PLATFORM_H__ +#define __XEN_PUBLIC_PLATFORM_H__ + +#include <xen/interface/xen.h> + +#define XENPF_INTERFACE_VERSION 0x03000001 + +/* + * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC, + * 1 January, 1970 if the current system time was <system_time>. + */ +#define XENPF_settime             17 +struct xenpf_settime { +	/* IN variables. */ +	uint32_t secs; +	uint32_t nsecs; +	uint64_t system_time; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t); + +/* + * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. + * On x86, @type is an architecture-defined MTRR memory type. + * On success, returns the MTRR that was used (@reg) and a handle that can + * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. + * (x86-specific). + */ +#define XENPF_add_memtype         31 +struct xenpf_add_memtype { +	/* IN variables. */ +	xen_pfn_t mfn; +	uint64_t nr_mfns; +	uint32_t type; +	/* OUT variables. */ +	uint32_t handle; +	uint32_t reg; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_add_memtype_t); + +/* + * Tear down an existing memory-range type. If @handle is remembered then it + * should be passed in to accurately tear down the correct setting (in case + * of overlapping memory regions with differing types). If it is not known + * then @handle should be set to zero. In all cases @reg must be set. + * (x86-specific). + */ +#define XENPF_del_memtype         32 +struct xenpf_del_memtype { +	/* IN variables. */ +	uint32_t handle; +	uint32_t reg; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_del_memtype_t); + +/* Read current type of an MTRR (x86-specific). */ +#define XENPF_read_memtype        33 +struct xenpf_read_memtype { +	/* IN variables. */ +	uint32_t reg; +	/* OUT variables. */ +	xen_pfn_t mfn; +	uint64_t nr_mfns; +	uint32_t type; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_read_memtype_t); + +#define XENPF_microcode_update    35 +struct xenpf_microcode_update { +	/* IN variables. */ +	GUEST_HANDLE(void) data;          /* Pointer to microcode data */ +	uint32_t length;                  /* Length of microcode data. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_microcode_update_t); + +#define XENPF_platform_quirk      39 +#define QUIRK_NOIRQBALANCING      1 /* Do not restrict IO-APIC RTE targets */ +#define QUIRK_IOAPIC_BAD_REGSEL   2 /* IO-APIC REGSEL forgets its value    */ +#define QUIRK_IOAPIC_GOOD_REGSEL  3 /* IO-APIC REGSEL behaves properly     */ +struct xenpf_platform_quirk { +	/* IN variables. */ +	uint32_t quirk_id; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t); + +#define XENPF_firmware_info       50 +#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */ +#define XEN_FW_KBD_SHIFT_FLAGS    5 /* Int16, Fn02: Get keyboard shift flags. */ +struct xenpf_firmware_info { +	/* IN variables. */ +	uint32_t type; +	uint32_t index; +	/* OUT variables. */ +	union { +		struct { +			/* Int13, Fn48: Check Extensions Present. */ +			uint8_t device;                   /* %dl: bios device number */ +			uint8_t version;                  /* %ah: major version      */ +			uint16_t interface_support;       /* %cx: support bitmap     */ +			/* Int13, Fn08: Legacy Get Device Parameters. */ +			uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */ +			uint8_t legacy_max_head;          /* %dh: max head #         */ +			uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */ +			/* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ +			/* NB. First uint16_t of buffer must be set to buffer size.      */ +			GUEST_HANDLE(void) edd_params; +		} disk_info; /* XEN_FW_DISK_INFO */ +		struct { +			uint8_t device;                   /* bios device number  */ +			uint32_t mbr_signature;           /* offset 0x1b8 in mbr */ +		} disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ +		struct { +			/* Int10, AX=4F15: Get EDID info. */ +			uint8_t capabilities; +			uint8_t edid_transfer_time; +			/* must refer to 128-byte buffer */ +			GUEST_HANDLE(uchar) edid; +		} vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + +		uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */ +	} u; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t); + +#define XENPF_enter_acpi_sleep    51 +struct xenpf_enter_acpi_sleep { +	/* IN variables */ +	uint16_t val_a;             /* PM1a control / sleep type A. */ +	uint16_t val_b;             /* PM1b control / sleep type B. */ +	uint32_t sleep_state;       /* Which state to enter (Sn). */ +#define XENPF_ACPI_SLEEP_EXTENDED 0x00000001 +	uint32_t flags;             /* XENPF_ACPI_SLEEP_*. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_enter_acpi_sleep_t); + +#define XENPF_change_freq         52 +struct xenpf_change_freq { +	/* IN variables */ +	uint32_t flags; /* Must be zero. */ +	uint32_t cpu;   /* Physical cpu. */ +	uint64_t freq;  /* New frequency (Hz). */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_change_freq_t); + +/* + * Get idle times (nanoseconds since boot) for physical CPUs specified in the + * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is + * indexed by CPU number; only entries with the corresponding @cpumap_bitmap + * bit set are written to. On return, @cpumap_bitmap is modified so that any + * non-existent CPUs are cleared. Such CPUs have their @idletime array entry + * cleared. + */ +#define XENPF_getidletime         53 +struct xenpf_getidletime { +	/* IN/OUT variables */ +	/* IN: CPUs to interrogate; OUT: subset of IN which are present */ +	GUEST_HANDLE(uchar) cpumap_bitmap; +	/* IN variables */ +	/* Size of cpumap bitmap. */ +	uint32_t cpumap_nr_cpus; +	/* Must be indexable for every cpu in cpumap_bitmap. */ +	GUEST_HANDLE(uint64_t) idletime; +	/* OUT variables */ +	/* System time when the idletime snapshots were taken. */ +	uint64_t now; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t); + +#define XENPF_set_processor_pminfo      54 + +/* ability bits */ +#define XEN_PROCESSOR_PM_CX	1 +#define XEN_PROCESSOR_PM_PX	2 +#define XEN_PROCESSOR_PM_TX	4 + +/* cmd type */ +#define XEN_PM_CX   0 +#define XEN_PM_PX   1 +#define XEN_PM_TX   2 +#define XEN_PM_PDC  3 +/* Px sub info type */ +#define XEN_PX_PCT   1 +#define XEN_PX_PSS   2 +#define XEN_PX_PPC   4 +#define XEN_PX_PSD   8 + +struct xen_power_register { +	uint32_t     space_id; +	uint32_t     bit_width; +	uint32_t     bit_offset; +	uint32_t     access_size; +	uint64_t     address; +}; + +struct xen_processor_csd { +	uint32_t    domain;      /* domain number of one dependent group */ +	uint32_t    coord_type;  /* coordination type */ +	uint32_t    num;         /* number of processors in same domain */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_processor_csd); + +struct xen_processor_cx { +	struct xen_power_register  reg; /* GAS for Cx trigger register */ +	uint8_t     type;     /* cstate value, c0: 0, c1: 1, ... */ +	uint32_t    latency;  /* worst latency (ms) to enter/exit this cstate */ +	uint32_t    power;    /* average power consumption(mW) */ +	uint32_t    dpcnt;    /* number of dependency entries */ +	GUEST_HANDLE(xen_processor_csd) dp; /* NULL if no dependency */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_processor_cx); + +struct xen_processor_flags { +	uint32_t bm_control:1; +	uint32_t bm_check:1; +	uint32_t has_cst:1; +	uint32_t power_setup_done:1; +	uint32_t bm_rld_set:1; +}; + +struct xen_processor_power { +	uint32_t count;  /* number of C state entries in array below */ +	struct xen_processor_flags flags;  /* global flags of this processor */ +	GUEST_HANDLE(xen_processor_cx) states; /* supported c states */ +}; + +struct xen_pct_register { +	uint8_t  descriptor; +	uint16_t length; +	uint8_t  space_id; +	uint8_t  bit_width; +	uint8_t  bit_offset; +	uint8_t  reserved; +	uint64_t address; +}; + +struct xen_processor_px { +	uint64_t core_frequency; /* megahertz */ +	uint64_t power;      /* milliWatts */ +	uint64_t transition_latency; /* microseconds */ +	uint64_t bus_master_latency; /* microseconds */ +	uint64_t control;        /* control value */ +	uint64_t status;     /* success indicator */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_processor_px); + +struct xen_psd_package { +	uint64_t num_entries; +	uint64_t revision; +	uint64_t domain; +	uint64_t coord_type; +	uint64_t num_processors; +}; + +struct xen_processor_performance { +	uint32_t flags;     /* flag for Px sub info type */ +	uint32_t platform_limit;  /* Platform limitation on freq usage */ +	struct xen_pct_register control_register; +	struct xen_pct_register status_register; +	uint32_t state_count;     /* total available performance states */ +	GUEST_HANDLE(xen_processor_px) states; +	struct xen_psd_package domain_info; +	uint32_t shared_type;     /* coordination type of this processor */ +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance); + +struct xenpf_set_processor_pminfo { +	/* IN variables */ +	uint32_t id;    /* ACPI CPU ID */ +	uint32_t type;  /* {XEN_PM_CX, XEN_PM_PX} */ +	union { +		struct xen_processor_power          power;/* Cx: _CST/_CSD */ +		struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */ +		GUEST_HANDLE(uint32_t)              pdc; +	}; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo); + +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { +	/* IN */ +	uint32_t xen_cpuid; +	/* OUT */ +	/* The maxium cpu_id that is present */ +	uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE   1 +	/* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID  2 +	uint32_t flags; +	uint32_t apic_id; +	uint32_t acpi_id; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_pcpuinfo); + +#define XENPF_cpu_online	56 +#define XENPF_cpu_offline	57 +struct xenpf_cpu_ol { +	uint32_t cpuid; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_cpu_ol); + +#define XENPF_cpu_hotadd	58 +struct xenpf_cpu_hotadd { +	uint32_t apic_id; +	uint32_t acpi_id; +	uint32_t pxm; +}; + +#define XENPF_mem_hotadd	59 +struct xenpf_mem_hotadd { +	uint64_t spfn; +	uint64_t epfn; +	uint32_t pxm; +	uint32_t flags; +}; + +#define XENPF_core_parking     60 +struct xenpf_core_parking { +	/* IN variables */ +#define XEN_CORE_PARKING_SET   1 +#define XEN_CORE_PARKING_GET   2 +	uint32_t type; +	/* IN variables:  set cpu nums expected to be idled */ +	/* OUT variables: get cpu nums actually be idled */ +	uint32_t idle_nums; +}; +DEFINE_GUEST_HANDLE_STRUCT(xenpf_core_parking); + +struct xen_platform_op { +	uint32_t cmd; +	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ +	union { +		struct xenpf_settime           settime; +		struct xenpf_add_memtype       add_memtype; +		struct xenpf_del_memtype       del_memtype; +		struct xenpf_read_memtype      read_memtype; +		struct xenpf_microcode_update  microcode; +		struct xenpf_platform_quirk    platform_quirk; +		struct xenpf_firmware_info     firmware_info; +		struct xenpf_enter_acpi_sleep  enter_acpi_sleep; +		struct xenpf_change_freq       change_freq; +		struct xenpf_getidletime       getidletime; +		struct xenpf_set_processor_pminfo set_pminfo; +		struct xenpf_pcpuinfo          pcpu_info; +		struct xenpf_cpu_ol            cpu_ol; +		struct xenpf_cpu_hotadd        cpu_add; +		struct xenpf_mem_hotadd        mem_add; +		struct xenpf_core_parking      core_parking; +		uint8_t                        pad[128]; +	} u; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_platform_op_t); + +#endif /* __XEN_PUBLIC_PLATFORM_H__ */ diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h index 5fec575a800..9ce083960a2 100644 --- a/include/xen/interface/sched.h +++ b/include/xen/interface/sched.h @@ -9,7 +9,7 @@  #ifndef __XEN_PUBLIC_SCHED_H__  #define __XEN_PUBLIC_SCHED_H__ -#include "event_channel.h" +#include <xen/interface/event_channel.h>  /*   * The prototype for this hypercall is: @@ -65,6 +65,39 @@ struct sched_poll {  DEFINE_GUEST_HANDLE_STRUCT(sched_poll);  /* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains.  A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown        4 +struct sched_remote_shutdown { +    domid_t domain_id;         /* Remote domain ID */ +    unsigned int reason;       /* SHUTDOWN_xxx reason */ +}; + +/* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + *               after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog    6 +struct sched_watchdog { +    uint32_t id;                /* watchdog ID */ +    uint32_t timeout;           /* timeout */ +}; + +/*   * Reason codes for SCHEDOP_shutdown. These may be interpreted by control   * software to determine the appropriate action. For the most part, Xen does   * not care about the shutdown code. @@ -73,5 +106,6 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_poll);  #define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */  #define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */  #define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */ +#define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     */  #endif /* __XEN_PUBLIC_SCHED_H__ */ diff --git a/include/xen/interface/vcpu.h b/include/xen/interface/vcpu.h index 87e6f8a4866..b05288ce399 100644 --- a/include/xen/interface/vcpu.h +++ b/include/xen/interface/vcpu.h @@ -170,4 +170,6 @@ struct vcpu_register_vcpu_info {  };  DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */ +#define VCPUOP_send_nmi             11  #endif /* __XEN_PUBLIC_VCPU_H__ */ diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h index e8b6519d47e..7ff6498679a 100644 --- a/include/xen/interface/version.h +++ b/include/xen/interface/version.h @@ -45,7 +45,7 @@ struct xen_changeset_info {  #define XENVER_platform_parameters 5  struct xen_platform_parameters { -    unsigned long virt_start; +    xen_ulong_t virt_start;  };  #define XENVER_get_features 6 @@ -55,9 +55,12 @@ struct xen_feature_info {  };  /* Declares the features reported by XENVER_get_features. */ -#include "features.h" +#include <xen/interface/features.h>  /* arg == NULL; returns host memory page size. */  #define XENVER_pagesize 7 +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 +  #endif /* __XEN_PUBLIC_VERSION_H__ */ diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h new file mode 100644 index 00000000000..73a4ea714d9 --- /dev/null +++ b/include/xen/interface/xen-mca.h @@ -0,0 +1,385 @@ +/****************************************************************************** + * arch-x86/mca.h + * Guest OS machine check interface to x86 Xen. + * + * Contributed by Advanced Micro Devices, Inc. + * Author: Christoph Egger <Christoph.Egger@amd.com> + * + * Updated by Intel Corporation + * Author: Liu, Jinsong <jinsong.liu@intel.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ +#define __XEN_PUBLIC_ARCH_X86_MCA_H__ + +/* Hypercall */ +#define __HYPERVISOR_mca __HYPERVISOR_arch_0 + +#define XEN_MCA_INTERFACE_VERSION	0x01ecc003 + +/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */ +#define XEN_MC_NONURGENT	0x1 +/* IN: Dom0 calls hypercall to retrieve urgent error log entry */ +#define XEN_MC_URGENT		0x2 +/* IN: Dom0 acknowledges previosly-fetched error log entry */ +#define XEN_MC_ACK		0x4 + +/* OUT: All is ok */ +#define XEN_MC_OK		0x0 +/* OUT: Domain could not fetch data. */ +#define XEN_MC_FETCHFAILED	0x1 +/* OUT: There was no machine check data to fetch. */ +#define XEN_MC_NODATA		0x2 + +#ifndef __ASSEMBLY__ +/* vIRQ injected to Dom0 */ +#define VIRQ_MCA VIRQ_ARCH_0 + +/* + * mc_info entry types + * mca machine check info are recorded in mc_info entries. + * when fetch mca info, it can use MC_TYPE_... to distinguish + * different mca info. + */ +#define MC_TYPE_GLOBAL		0 +#define MC_TYPE_BANK		1 +#define MC_TYPE_EXTENDED	2 +#define MC_TYPE_RECOVERY	3 + +struct mcinfo_common { +	uint16_t type; /* structure type */ +	uint16_t size; /* size of this struct in bytes */ +}; + +#define MC_FLAG_CORRECTABLE	(1 << 0) +#define MC_FLAG_UNCORRECTABLE	(1 << 1) +#define MC_FLAG_RECOVERABLE	(1 << 2) +#define MC_FLAG_POLLED		(1 << 3) +#define MC_FLAG_RESET		(1 << 4) +#define MC_FLAG_CMCI		(1 << 5) +#define MC_FLAG_MCE		(1 << 6) + +/* contains x86 global mc information */ +struct mcinfo_global { +	struct mcinfo_common common; + +	uint16_t mc_domid; /* running domain at the time in error */ +	uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ +	uint32_t mc_socketid; /* physical socket of the physical core */ +	uint16_t mc_coreid; /* physical impacted core */ +	uint16_t mc_core_threadid; /* core thread of physical core */ +	uint32_t mc_apicid; +	uint32_t mc_flags; +	uint64_t mc_gstatus; /* global status */ +}; + +/* contains x86 bank mc information */ +struct mcinfo_bank { +	struct mcinfo_common common; + +	uint16_t mc_bank; /* bank nr */ +	uint16_t mc_domid; /* domain referenced by mc_addr if valid */ +	uint64_t mc_status; /* bank status */ +	uint64_t mc_addr; /* bank address */ +	uint64_t mc_misc; +	uint64_t mc_ctrl2; +	uint64_t mc_tsc; +}; + +struct mcinfo_msr { +	uint64_t reg; /* MSR */ +	uint64_t value; /* MSR value */ +}; + +/* contains mc information from other or additional mc MSRs */ +struct mcinfo_extended { +	struct mcinfo_common common; +	uint32_t mc_msrs; /* Number of msr with valid values. */ +	/* +	 * Currently Intel extended MSR (32/64) include all gp registers +	 * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be +	 * useful at present. So expand this array to 16/32 to leave room. +	 */ +	struct mcinfo_msr mc_msr[sizeof(void *) * 4]; +}; + +/* Recovery Action flags. Giving recovery result information to DOM0 */ + +/* Xen takes successful recovery action, the error is recovered */ +#define REC_ACTION_RECOVERED (0x1 << 0) +/* No action is performed by XEN */ +#define REC_ACTION_NONE (0x1 << 1) +/* It's possible DOM0 might take action ownership in some case */ +#define REC_ACTION_NEED_RESET (0x1 << 2) + +/* + * Different Recovery Action types, if the action is performed successfully, + * REC_ACTION_RECOVERED flag will be returned. + */ + +/* Page Offline Action */ +#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) +/* CPU offline Action */ +#define MC_ACTION_CPU_OFFLINE (0x1 << 1) +/* L3 cache disable Action */ +#define MC_ACTION_CACHE_SHRINK (0x1 << 2) + +/* + * Below interface used between XEN/DOM0 for passing XEN's recovery action + * information to DOM0. + */ +struct page_offline_action { +	/* Params for passing the offlined page number to DOM0 */ +	uint64_t mfn; +	uint64_t status; +}; + +struct cpu_offline_action { +	/* Params for passing the identity of the offlined CPU to DOM0 */ +	uint32_t mc_socketid; +	uint16_t mc_coreid; +	uint16_t mc_core_threadid; +}; + +#define MAX_UNION_SIZE 16 +struct mcinfo_recovery { +	struct mcinfo_common common; +	uint16_t mc_bank; /* bank nr */ +	uint8_t action_flags; +	uint8_t action_types; +	union { +		struct page_offline_action page_retire; +		struct cpu_offline_action cpu_offline; +		uint8_t pad[MAX_UNION_SIZE]; +	} action_info; +}; + + +#define MCINFO_MAXSIZE 768 +struct mc_info { +	/* Number of mcinfo_* entries in mi_data */ +	uint32_t mi_nentries; +	uint32_t flags; +	uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; +}; +DEFINE_GUEST_HANDLE_STRUCT(mc_info); + +#define __MC_MSR_ARRAYSIZE 8 +#define __MC_MSR_MCGCAP 0 +#define __MC_NMSRS 1 +#define MC_NCAPS 7 +struct mcinfo_logical_cpu { +	uint32_t mc_cpunr; +	uint32_t mc_chipid; +	uint16_t mc_coreid; +	uint16_t mc_threadid; +	uint32_t mc_apicid; +	uint32_t mc_clusterid; +	uint32_t mc_ncores; +	uint32_t mc_ncores_active; +	uint32_t mc_nthreads; +	uint32_t mc_cpuid_level; +	uint32_t mc_family; +	uint32_t mc_vendor; +	uint32_t mc_model; +	uint32_t mc_step; +	char mc_vendorid[16]; +	char mc_brandid[64]; +	uint32_t mc_cpu_caps[MC_NCAPS]; +	uint32_t mc_cache_size; +	uint32_t mc_cache_alignment; +	uint32_t mc_nmsrvals; +	struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; +}; +DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu); + +/* + * Prototype: + *    uint32_t x86_mcinfo_nentries(struct mc_info *mi); + */ +#define x86_mcinfo_nentries(_mi)    \ +	((_mi)->mi_nentries) +/* + * Prototype: + *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); + */ +#define x86_mcinfo_first(_mi)       \ +	((struct mcinfo_common *)(_mi)->mi_data) +/* + * Prototype: + *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); + */ +#define x86_mcinfo_next(_mic)       \ +	((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) + +/* + * Prototype: + *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); + */ +static inline void x86_mcinfo_lookup(struct mcinfo_common **ret, +				     struct mc_info *mi, uint16_t type) +{ +	uint32_t i; +	struct mcinfo_common *mic; +	bool found = 0; + +	if (!ret || !mi) +		return; + +	mic = x86_mcinfo_first(mi); +	for (i = 0; i < x86_mcinfo_nentries(mi); i++) { +		if (mic->type == type) { +			found = 1; +			break; +		} +		mic = x86_mcinfo_next(mic); +	} + +	*ret = found ? mic : NULL; +} + +/* + * Fetch machine check data from hypervisor. + */ +#define XEN_MC_fetch		1 +struct xen_mc_fetch { +	/* +	 * IN: XEN_MC_NONURGENT, XEN_MC_URGENT, +	 * XEN_MC_ACK if ack'king an earlier fetch +	 * OUT: XEN_MC_OK, XEN_MC_FETCHAILED, XEN_MC_NODATA +	 */ +	uint32_t flags; +	uint32_t _pad0; +	/* OUT: id for ack, IN: id we are ack'ing */ +	uint64_t fetch_id; + +	/* OUT variables. */ +	GUEST_HANDLE(mc_info) data; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch); + + +/* + * This tells the hypervisor to notify a DomU about the machine check error + */ +#define XEN_MC_notifydomain	2 +struct xen_mc_notifydomain { +	/* IN variables */ +	uint16_t mc_domid; /* The unprivileged domain to notify */ +	uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify */ + +	/* IN/OUT variables */ +	uint32_t flags; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain); + +#define XEN_MC_physcpuinfo	3 +struct xen_mc_physcpuinfo { +	/* IN/OUT */ +	uint32_t ncpus; +	uint32_t _pad0; +	/* OUT */ +	GUEST_HANDLE(mcinfo_logical_cpu) info; +}; + +#define XEN_MC_msrinject	4 +#define MC_MSRINJ_MAXMSRS	8 +struct xen_mc_msrinject { +	/* IN */ +	uint32_t mcinj_cpunr; /* target processor id */ +	uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ +	uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ +	uint32_t _pad0; +	struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; +}; + +/* Flags for mcinj_flags above; bits 16-31 are reserved */ +#define MC_MSRINJ_F_INTERPOSE	0x1 + +#define XEN_MC_mceinject	5 +struct xen_mc_mceinject { +	unsigned int mceinj_cpunr; /* target processor id */ +}; + +struct xen_mc { +	uint32_t cmd; +	uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ +	union { +		struct xen_mc_fetch        mc_fetch; +		struct xen_mc_notifydomain mc_notifydomain; +		struct xen_mc_physcpuinfo  mc_physcpuinfo; +		struct xen_mc_msrinject    mc_msrinject; +		struct xen_mc_mceinject    mc_mceinject; +	} u; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_mc); + +/* Fields are zero when not available */ +struct xen_mce { +	__u64 status; +	__u64 misc; +	__u64 addr; +	__u64 mcgstatus; +	__u64 ip; +	__u64 tsc;	/* cpu time stamp counter */ +	__u64 time;	/* wall time_t when error was detected */ +	__u8  cpuvendor;	/* cpu vendor as encoded in system.h */ +	__u8  inject_flags;	/* software inject flags */ +	__u16  pad; +	__u32 cpuid;	/* CPUID 1 EAX */ +	__u8  cs;		/* code segment */ +	__u8  bank;	/* machine check bank */ +	__u8  cpu;	/* cpu number; obsolete; use extcpu now */ +	__u8  finished;   /* entry is valid */ +	__u32 extcpu;	/* linux cpu number that detected the error */ +	__u32 socketid;	/* CPU socket ID */ +	__u32 apicid;	/* CPU initial apic ID */ +	__u64 mcgcap;	/* MCGCAP MSR: machine check capabilities of CPU */ +}; + +/* + * This structure contains all data related to the MCE log.  Also + * carries a signature to make it easier to find from external + * debugging tools.  Each entry is only valid when its finished flag + * is set. + */ + +#define XEN_MCE_LOG_LEN 32 + +struct xen_mce_log { +	char signature[12]; /* "MACHINECHECK" */ +	unsigned len;	    /* = XEN_MCE_LOG_LEN */ +	unsigned next; +	unsigned flags; +	unsigned recordlen;	/* length of struct xen_mce */ +	struct xen_mce entry[XEN_MCE_LOG_LEN]; +}; + +#define XEN_MCE_OVERFLOW 0		/* bit 0 in flags means overflow */ + +#define XEN_MCE_LOG_SIGNATURE	"MACHINECHECK" + +#define MCE_GET_RECORD_LEN   _IOR('M', 1, int) +#define MCE_GET_LOG_LEN      _IOR('M', 2, int) +#define MCE_GETCLEAR_FLAGS   _IOR('M', 3, int) + +#endif /* __ASSEMBLY__ */ +#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 2befa3e2f1b..de082130ba4 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -10,7 +10,6 @@  #define __XEN_PUBLIC_XEN_H__  #include <asm/xen/interface.h> -#include <asm/pvclock-abi.h>  /*   * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). @@ -30,7 +29,7 @@  #define __HYPERVISOR_stack_switch          3  #define __HYPERVISOR_set_callbacks         4  #define __HYPERVISOR_fpu_taskswitch        5 -#define __HYPERVISOR_sched_op              6 +#define __HYPERVISOR_sched_op_compat       6  #define __HYPERVISOR_dom0_op               7  #define __HYPERVISOR_set_debugreg          8  #define __HYPERVISOR_get_debugreg          9 @@ -52,12 +51,13 @@  #define __HYPERVISOR_mmuext_op            26  #define __HYPERVISOR_acm_op               27  #define __HYPERVISOR_nmi_op               28 -#define __HYPERVISOR_sched_op_new         29 +#define __HYPERVISOR_sched_op             29  #define __HYPERVISOR_callback_op          30  #define __HYPERVISOR_xenoprof_op          31  #define __HYPERVISOR_event_channel_op     32  #define __HYPERVISOR_physdev_op           33  #define __HYPERVISOR_hvm_op               34 +#define __HYPERVISOR_tmem_op              38  /* Architecture-specific hypercall definitions. */  #define __HYPERVISOR_arch_0               48 @@ -79,6 +79,7 @@  #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */  #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */  #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */ +#define VIRQ_PCPU_STATE 9  /* (DOM0) PCPU state changed                   */  /* Architecture-specific VIRQ definitions. */  #define VIRQ_ARCH_0    16 @@ -188,7 +189,7 @@ struct mmuext_op {  	unsigned int cmd;  	union {  		/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ -		unsigned long mfn; +		xen_pfn_t mfn;  		/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */  		unsigned long linear_addr;  	} arg1; @@ -274,18 +275,12 @@ DEFINE_GUEST_HANDLE_STRUCT(mmu_update);   * NB. The fields are natural register size for this architecture.   */  struct multicall_entry { -    unsigned long op; -    long result; -    unsigned long args[6]; +    xen_ulong_t op; +    xen_long_t result; +    xen_ulong_t args[6];  };  DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); -/* - * Event channel endpoints per domain: - *  1024 if a long is 32 bits; 4096 if a long is 64 bits. - */ -#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) -  struct vcpu_time_info {  	/*  	 * Updates to the following values are preceded and followed @@ -340,7 +335,7 @@ struct vcpu_info {  	 */  	uint8_t evtchn_upcall_pending;  	uint8_t evtchn_upcall_mask; -	unsigned long evtchn_pending_sel; +	xen_ulong_t evtchn_pending_sel;  	struct arch_vcpu_info arch;  	struct pvclock_vcpu_time_info time;  }; /* 64 bytes (x86) */ @@ -383,8 +378,8 @@ struct shared_info {  	 * per-vcpu selector word to be set. Each bit in the selector covers a  	 * 'C long' in the PENDING bitfield array.  	 */ -	unsigned long evtchn_pending[sizeof(unsigned long) * 8]; -	unsigned long evtchn_mask[sizeof(unsigned long) * 8]; +	xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; +	xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8];  	/*  	 * Wallclock time: updated only by control software. Guests should base @@ -428,11 +423,11 @@ struct start_info {  	unsigned long nr_pages;     /* Total pages allocated to this domain.  */  	unsigned long shared_info;  /* MACHINE address of shared info struct. */  	uint32_t flags;             /* SIF_xxx flags.                         */ -	unsigned long store_mfn;    /* MACHINE page number of shared page.    */ +	xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */  	uint32_t store_evtchn;      /* Event channel for store communication. */  	union {  		struct { -			unsigned long mfn;  /* MACHINE page number of console page.   */ +			xen_pfn_t mfn;      /* MACHINE page number of console page.   */  			uint32_t  evtchn;   /* Event channel for console page.        */  		} domU;  		struct { @@ -449,9 +444,50 @@ struct start_info {  	int8_t cmd_line[MAX_GUEST_CMDLINE];  }; +struct dom0_vga_console_info { +	uint8_t video_type; +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB    0x23 +#define XEN_VGATYPE_EFI_LFB     0x70 + +	union { +		struct { +			/* Font height, in pixels. */ +			uint16_t font_height; +			/* Cursor location (column, row). */ +			uint16_t cursor_x, cursor_y; +			/* Number of rows and columns (dimensions in characters). */ +			uint16_t rows, columns; +		} text_mode_3; + +		struct { +			/* Width and height, in pixels. */ +			uint16_t width, height; +			/* Bytes per scan line. */ +			uint16_t bytes_per_line; +			/* Bits per pixel. */ +			uint16_t bits_per_pixel; +			/* LFB physical address, and size (in units of 64kB). */ +			uint32_t lfb_base; +			uint32_t lfb_size; +			/* RGB mask offsets and sizes, as defined by VBE 1.2+ */ +			uint8_t  red_pos, red_size; +			uint8_t  green_pos, green_size; +			uint8_t  blue_pos, blue_size; +			uint8_t  rsvd_pos, rsvd_size; + +			/* VESA capabilities (offset 0xa, VESA command 0x4f00). */ +			uint32_t gbl_caps; +			/* Mode attributes (offset 0x0, VESA command 0x4f01). */ +			uint16_t mode_attrs; +		} vesa_lfb; +	} u; +}; +  /* These flags are passed in the 'flags' field of start_info_t. */  #define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */  #define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */ +#define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */  typedef uint64_t cpumap_t; @@ -461,6 +497,29 @@ typedef uint8_t xen_domain_handle_t[16];  #define __mk_unsigned_long(x) x ## UL  #define mk_unsigned_long(x) __mk_unsigned_long(x) +#define TMEM_SPEC_VERSION 1 + +struct tmem_op { +	uint32_t cmd; +	int32_t pool_id; +	union { +		struct {  /* for cmd == TMEM_NEW_POOL */ +			uint64_t uuid[2]; +			uint32_t flags; +		} new; +		struct { +			uint64_t oid[3]; +			uint32_t index; +			uint32_t tmem_offset; +			uint32_t pfn_offset; +			uint32_t len; +			GUEST_HANDLE(void) gmfn; /* guest machine page frame */ +		} gen; +	} u; +}; + +DEFINE_GUEST_HANDLE(u64); +  #else /* __ASSEMBLY__ */  /* In assembly code we cannot use C numeric constant suffixes. */ diff --git a/include/xen/interface/xencomm.h b/include/xen/interface/xencomm.h deleted file mode 100644 index ac45e0712af..00000000000 --- a/include/xen/interface/xencomm.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Copyright (C) IBM Corp. 2006 - */ - -#ifndef _XEN_XENCOMM_H_ -#define _XEN_XENCOMM_H_ - -/* A xencomm descriptor is a scatter/gather list containing physical - * addresses corresponding to a virtually contiguous memory area. The - * hypervisor translates these physical addresses to machine addresses to copy - * to and from the virtually contiguous area. - */ - -#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */ -#define XENCOMM_INVALID (~0UL) - -struct xencomm_desc { -    uint32_t magic; -    uint32_t nr_addrs; /* the number of entries in address[] */ -    uint64_t address[0]; -}; - -#endif /* _XEN_XENCOMM_H_ */ diff --git a/include/xen/page.h b/include/xen/page.h index eaf85fab126..12765b6f951 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -1 +1,18 @@ +#ifndef _XEN_PAGE_H +#define _XEN_PAGE_H +  #include <asm/xen/page.h> + +struct xen_memory_region { +	phys_addr_t start; +	phys_addr_t size; +}; + +#define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */ + +extern __initdata +struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS]; + +extern unsigned long xen_released_pages; + +#endif	/* _XEN_PAGE_H */ diff --git a/include/xen/platform_pci.h b/include/xen/platform_pci.h index a785a3b0c8c..5c52b558391 100644 --- a/include/xen/platform_pci.h +++ b/include/xen/platform_pci.h @@ -29,8 +29,7 @@  static inline int xen_must_unplug_nics(void) {  #if (defined(CONFIG_XEN_NETDEV_FRONTEND) || \  		defined(CONFIG_XEN_NETDEV_FRONTEND_MODULE)) && \ -		(defined(CONFIG_XEN_PLATFORM_PCI) || \ -		 defined(CONFIG_XEN_PLATFORM_PCI_MODULE)) +		defined(CONFIG_XEN_PVHVM)          return 1;  #else          return 0; @@ -40,14 +39,34 @@ static inline int xen_must_unplug_nics(void) {  static inline int xen_must_unplug_disks(void) {  #if (defined(CONFIG_XEN_BLKDEV_FRONTEND) || \  		defined(CONFIG_XEN_BLKDEV_FRONTEND_MODULE)) && \ -		(defined(CONFIG_XEN_PLATFORM_PCI) || \ -		 defined(CONFIG_XEN_PLATFORM_PCI_MODULE)) +		defined(CONFIG_XEN_PVHVM)          return 1;  #else          return 0;  #endif  } -extern int xen_platform_pci_unplug; - +#if defined(CONFIG_XEN_PVHVM) +extern bool xen_has_pv_devices(void); +extern bool xen_has_pv_disk_devices(void); +extern bool xen_has_pv_nic_devices(void); +extern bool xen_has_pv_and_legacy_disk_devices(void); +#else +static inline bool xen_has_pv_devices(void) +{ +	return IS_ENABLED(CONFIG_XEN); +} +static inline bool xen_has_pv_disk_devices(void) +{ +	return IS_ENABLED(CONFIG_XEN); +} +static inline bool xen_has_pv_nic_devices(void) +{ +	return IS_ENABLED(CONFIG_XEN); +} +static inline bool xen_has_pv_and_legacy_disk_devices(void) +{ +	return false; +} +#endif  #endif /* _XEN_PLATFORM_PCI_H */ diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index 2ea2fdc79c1..8b2eb93ae8b 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -1,17 +1,20 @@  #ifndef __LINUX_SWIOTLB_XEN_H  #define __LINUX_SWIOTLB_XEN_H +#include <linux/dma-direction.h>  #include <linux/swiotlb.h> -extern void xen_swiotlb_init(int verbose); +extern int xen_swiotlb_init(int verbose, bool early);  extern void  *xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, -			    dma_addr_t *dma_handle, gfp_t flags); +			    dma_addr_t *dma_handle, gfp_t flags, +			    struct dma_attrs *attrs);  extern void  xen_swiotlb_free_coherent(struct device *hwdev, size_t size, -			  void *vaddr, dma_addr_t dma_handle); +			  void *vaddr, dma_addr_t dma_handle, +			  struct dma_attrs *attrs);  extern dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,  				       unsigned long offset, size_t size, @@ -21,15 +24,6 @@ extern dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,  extern void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,  				   size_t size, enum dma_data_direction dir,  				   struct dma_attrs *attrs); -/* -extern int -xen_swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, -		   enum dma_data_direction dir); - -extern void -xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, -		     enum dma_data_direction dir); -*/  extern int  xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl,  			 int nelems, enum dma_data_direction dir, @@ -62,4 +56,6 @@ xen_swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);  extern int  xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); +extern int +xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask);  #endif /* __LINUX_SWIOTLB_XEN_H */ diff --git a/include/xen/tmem.h b/include/xen/tmem.h new file mode 100644 index 00000000000..3930a90045f --- /dev/null +++ b/include/xen/tmem.h @@ -0,0 +1,17 @@ +#ifndef _XEN_TMEM_H +#define _XEN_TMEM_H + +#include <linux/types.h> + +#ifdef CONFIG_XEN_TMEM_MODULE +#define tmem_enabled true +#else +/* defined in drivers/xen/tmem.c */ +extern bool tmem_enabled; +#endif + +#ifdef CONFIG_XEN_SELFBALLOONING +extern int xen_selfballoon_init(bool, bool); +#endif + +#endif /* _XEN_TMEM_H */ diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 98b92154a26..0b3149ed7ea 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -2,30 +2,37 @@  #define INCLUDE_XEN_OPS_H  #include <linux/percpu.h> +#include <linux/notifier.h> +#include <asm/xen/interface.h>  DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); -void xen_pre_suspend(void); -void xen_post_suspend(int suspend_cancelled); -void xen_hvm_post_suspend(int suspend_cancelled); - -void xen_mm_pin_all(void); -void xen_mm_unpin_all(void); +void xen_arch_pre_suspend(void); +void xen_arch_post_suspend(int suspend_cancelled);  void xen_timer_resume(void);  void xen_arch_resume(void); +void xen_resume_notifier_register(struct notifier_block *nb); +void xen_resume_notifier_unregister(struct notifier_block *nb); +  int xen_setup_shutdown_event(void);  extern unsigned long *xen_contiguous_bitmap; -int xen_create_contiguous_region(unsigned long vstart, unsigned int order, -				unsigned int address_bits); +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, +				unsigned int address_bits, +				dma_addr_t *dma_handle); -void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order); +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order); +struct vm_area_struct;  int xen_remap_domain_mfn_range(struct vm_area_struct *vma,  			       unsigned long addr, -			       unsigned long mfn, int nr, -			       pgprot_t prot, unsigned domid); +			       xen_pfn_t mfn, int nr, +			       pgprot_t prot, unsigned domid, +			       struct page **pages); +int xen_unmap_domain_mfn_range(struct vm_area_struct *vma, +			       int numpgs, struct page **pages); +bool xen_running_on_version_or_later(unsigned int major, unsigned int minor);  #endif /* INCLUDE_XEN_OPS_H */ diff --git a/include/xen/xen.h b/include/xen/xen.h index a16402418d3..0c0e3ef4c45 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -23,10 +23,24 @@ extern enum xen_domain_type xen_domain_type;  #include <xen/interface/xen.h>  #include <asm/xen/hypervisor.h> -#define xen_initial_domain()	(xen_pv_domain() && \ -				 xen_start_info->flags & SIF_INITDOMAIN) +#define xen_initial_domain()	(xen_domain() && \ +				 xen_start_info && xen_start_info->flags & SIF_INITDOMAIN)  #else  /* !CONFIG_XEN_DOM0 */  #define xen_initial_domain()	(0)  #endif	/* CONFIG_XEN_DOM0 */ +#ifdef CONFIG_XEN_PVH +/* This functionality exists only for x86. The XEN_PVHVM support exists + * only in x86 world - hence on ARM it will be always disabled. + * N.B. ARM guests are neither PV nor HVM nor PVHVM. + * It's a bit like PVH but is different also (it's further towards the H + * end of the spectrum than even PVH). + */ +#include <xen/features.h> +#define xen_pvh_domain() (xen_pv_domain() && \ +			  xen_feature(XENFEAT_auto_translated_physmap) && \ +			  xen_have_vector_callback) +#else +#define xen_pvh_domain()	(0) +#endif  #endif	/* _XEN_XEN_H */ diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h index 43e2d7d3397..0324c6d340c 100644 --- a/include/xen/xenbus.h +++ b/include/xen/xenbus.h @@ -37,6 +37,7 @@  #include <linux/device.h>  #include <linux/notifier.h>  #include <linux/mutex.h> +#include <linux/export.h>  #include <linux/completion.h>  #include <linux/init.h>  #include <linux/slab.h> @@ -69,6 +70,7 @@ struct xenbus_device {  	struct device dev;  	enum xenbus_state state;  	struct completion down; +	struct work_struct work;  };  static inline struct xenbus_device *to_xenbus_device(struct device *dev) @@ -84,47 +86,34 @@ struct xenbus_device_id  /* A xenbus driver. */  struct xenbus_driver { -	char *name; -	struct module *owner;  	const struct xenbus_device_id *ids;  	int (*probe)(struct xenbus_device *dev,  		     const struct xenbus_device_id *id);  	void (*otherend_changed)(struct xenbus_device *dev,  				 enum xenbus_state backend_state);  	int (*remove)(struct xenbus_device *dev); -	int (*suspend)(struct xenbus_device *dev, pm_message_t state); +	int (*suspend)(struct xenbus_device *dev);  	int (*resume)(struct xenbus_device *dev); -	int (*uevent)(struct xenbus_device *, char **, int, char *, int); +	int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *);  	struct device_driver driver;  	int (*read_otherend_details)(struct xenbus_device *dev);  	int (*is_ready)(struct xenbus_device *dev);  }; -static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv) -{ -	return container_of(drv, struct xenbus_driver, driver); +#define DEFINE_XENBUS_DRIVER(var, drvname, methods...)		\ +struct xenbus_driver var ## _driver = {				\ +	.driver.name = drvname + 0 ?: var ## _ids->devicetype,	\ +	.driver.owner = THIS_MODULE,				\ +	.ids = var ## _ids, ## methods				\  } -int __must_check __xenbus_register_frontend(struct xenbus_driver *drv, -					    struct module *owner, -					    const char *mod_name); - -static inline int __must_check -xenbus_register_frontend(struct xenbus_driver *drv) +static inline struct xenbus_driver *to_xenbus_driver(struct device_driver *drv)  { -	WARN_ON(drv->owner != THIS_MODULE); -	return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME); +	return container_of(drv, struct xenbus_driver, driver);  } -int __must_check __xenbus_register_backend(struct xenbus_driver *drv, -					   struct module *owner, -					   const char *mod_name); -static inline int __must_check -xenbus_register_backend(struct xenbus_driver *drv) -{ -	WARN_ON(drv->owner != THIS_MODULE); -	return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME); -} +int __must_check xenbus_register_frontend(struct xenbus_driver *); +int __must_check xenbus_register_backend(struct xenbus_driver *);  void xenbus_unregister_driver(struct xenbus_driver *drv); @@ -151,14 +140,14 @@ int xenbus_transaction_start(struct xenbus_transaction *t);  int xenbus_transaction_end(struct xenbus_transaction t, int abort);  /* Single read and scanf: returns -errno or num scanned if > 0. */ +__scanf(4, 5)  int xenbus_scanf(struct xenbus_transaction t, -		 const char *dir, const char *node, const char *fmt, ...) -	__attribute__((format(scanf, 4, 5))); +		 const char *dir, const char *node, const char *fmt, ...);  /* Single printf and write: returns -errno or 0. */ +__printf(4, 5)  int xenbus_printf(struct xenbus_transaction t, -		  const char *dir, const char *node, const char *fmt, ...) -	__attribute__((format(printf, 4, 5))); +		  const char *dir, const char *node, const char *fmt, ...);  /* Generic read function: NULL-terminated triples of name,   * sprintf-style type string, and pointer. Returns 0 or errno.*/ @@ -200,11 +189,11 @@ int xenbus_watch_path(struct xenbus_device *dev, const char *path,  		      struct xenbus_watch *watch,  		      void (*callback)(struct xenbus_watch *,  				       const char **, unsigned int)); +__printf(4, 5)  int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,  			 void (*callback)(struct xenbus_watch *,  					  const char **, unsigned int), -			 const char *pathfmt, ...) -	__attribute__ ((format (printf, 4, 5))); +			 const char *pathfmt, ...);  int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);  int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); @@ -218,12 +207,13 @@ int xenbus_unmap_ring(struct xenbus_device *dev,  		      grant_handle_t handle, void *vaddr);  int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); -int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);  int xenbus_free_evtchn(struct xenbus_device *dev, int port);  enum xenbus_state xenbus_read_driver_state(const char *path); +__printf(3, 4)  void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); +__printf(3, 4)  void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...);  const char *xenbus_strstate(enum xenbus_state state); diff --git a/include/xen/privcmd.h b/include/xen/xenbus_dev.h index b42cdfd92fe..bbee8c6a349 100644 --- a/include/xen/privcmd.h +++ b/include/xen/xenbus_dev.h @@ -1,9 +1,9 @@  /****************************************************************************** - * privcmd.h + * evtchn.h   * - * Interface to /proc/xen/privcmd. + * Interface to /dev/xen/xenbus_backend.   * - * Copyright (c) 2003-2005, K A Fraser + * Copyright (c) 2011 Bastian Blank <waldi@debian.org>   *   * This program is free software; you can redistribute it and/or   * modify it under the terms of the GNU General Public License version 2 @@ -30,51 +30,15 @@   * IN THE SOFTWARE.   */ -#ifndef __LINUX_PUBLIC_PRIVCMD_H__ -#define __LINUX_PUBLIC_PRIVCMD_H__ +#ifndef __LINUX_XEN_XENBUS_DEV_H__ +#define __LINUX_XEN_XENBUS_DEV_H__ -#include <linux/types.h> +#include <linux/ioctl.h> -typedef unsigned long xen_pfn_t; +#define IOCTL_XENBUS_BACKEND_EVTCHN			\ +	_IOC(_IOC_NONE, 'B', 0, 0) -#ifndef __user -#define __user -#endif +#define IOCTL_XENBUS_BACKEND_SETUP			\ +	_IOC(_IOC_NONE, 'B', 1, 0) -struct privcmd_hypercall { -	__u64 op; -	__u64 arg[5]; -}; - -struct privcmd_mmap_entry { -	__u64 va; -	__u64 mfn; -	__u64 npages; -}; - -struct privcmd_mmap { -	int num; -	domid_t dom; /* target domain */ -	struct privcmd_mmap_entry __user *entry; -}; - -struct privcmd_mmapbatch { -	int num;     /* number of pages to populate */ -	domid_t dom; /* target domain */ -	__u64 addr;  /* virtual address */ -	xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */ -}; - -/* - * @cmd: IOCTL_PRIVCMD_HYPERCALL - * @arg: &privcmd_hypercall_t - * Return: Value returned from execution of the specified hypercall. - */ -#define IOCTL_PRIVCMD_HYPERCALL					\ -	_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall)) -#define IOCTL_PRIVCMD_MMAP					\ -	_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap)) -#define IOCTL_PRIVCMD_MMAPBATCH					\ -	_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) - -#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ +#endif /* __LINUX_XEN_XENBUS_DEV_H__ */ diff --git a/include/xen/xencomm.h b/include/xen/xencomm.h deleted file mode 100644 index e43b039be11..00000000000 --- a/include/xen/xencomm.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA - * - * Copyright (C) IBM Corp. 2006 - * - * Authors: Hollis Blanchard <hollisb@us.ibm.com> - *          Jerone Young <jyoung5@us.ibm.com> - */ - -#ifndef _LINUX_XENCOMM_H_ -#define _LINUX_XENCOMM_H_ - -#include <xen/interface/xencomm.h> - -#define XENCOMM_MINI_ADDRS 3 -struct xencomm_mini { -	struct xencomm_desc _desc; -	uint64_t address[XENCOMM_MINI_ADDRS]; -}; - -/* To avoid additionnal virt to phys conversion, an opaque structure is -   presented.  */ -struct xencomm_handle; - -extern void xencomm_free(struct xencomm_handle *desc); -extern struct xencomm_handle *xencomm_map(void *ptr, unsigned long bytes); -extern struct xencomm_handle *__xencomm_map_no_alloc(void *ptr, -			unsigned long bytes,  struct xencomm_mini *xc_area); - -#if 0 -#define XENCOMM_MINI_ALIGNED(xc_desc, n)				\ -	struct xencomm_mini xc_desc ## _base[(n)]			\ -	__attribute__((__aligned__(sizeof(struct xencomm_mini))));	\ -	struct xencomm_mini *xc_desc = &xc_desc ## _base[0]; -#else -/* - * gcc bug workaround: - * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16660 - * gcc doesn't handle properly stack variable with - * __attribute__((__align__(sizeof(struct xencomm_mini)))) - */ -#define XENCOMM_MINI_ALIGNED(xc_desc, n)				\ -	unsigned char xc_desc ## _base[((n) + 1 ) *			\ -				       sizeof(struct xencomm_mini)];	\ -	struct xencomm_mini *xc_desc = (struct xencomm_mini *)		\ -		((unsigned long)xc_desc ## _base +			\ -		 (sizeof(struct xencomm_mini) -				\ -		  ((unsigned long)xc_desc ## _base) %			\ -		  sizeof(struct xencomm_mini))); -#endif -#define xencomm_map_no_alloc(ptr, bytes)			\ -	({ XENCOMM_MINI_ALIGNED(xc_desc, 1);			\ -		__xencomm_map_no_alloc(ptr, bytes, xc_desc); }) - -/* provided by architecture code: */ -extern unsigned long xencomm_vtop(unsigned long vaddr); - -static inline void *xencomm_pa(void *ptr) -{ -	return (void *)xencomm_vtop((unsigned long)ptr); -} - -#define xen_guest_handle(hnd)  ((hnd).p) - -#endif /* _LINUX_XENCOMM_H_ */  | 
