aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/xen/setup.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen/setup.c')
-rw-r--r--arch/x86/xen/setup.c172
1 files changed, 137 insertions, 35 deletions
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a4790bf22c5..2e555163c2f 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -17,6 +17,7 @@
#include <asm/e820.h>
#include <asm/setup.h>
#include <asm/acpi.h>
+#include <asm/numa.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
@@ -32,6 +33,9 @@
/* These are code, but not functions. Defined in entry.S */
extern const char xen_hypervisor_callback[];
extern const char xen_failsafe_callback[];
+#ifdef CONFIG_X86_64
+extern asmlinkage void nmi(void);
+#endif
extern void xen_sysenter_target(void);
extern void xen_syscall_target(void);
extern void xen_syscall32_target(void);
@@ -78,9 +82,16 @@ static void __init xen_add_extra_mem(u64 start, u64 size)
memblock_reserve(start, size);
xen_max_p2m_pfn = PFN_DOWN(start + size);
+ for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) {
+ unsigned long mfn = pfn_to_mfn(pfn);
+
+ if (WARN_ONCE(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn))
+ continue;
+ WARN_ONCE(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n",
+ pfn, mfn);
- for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ }
}
static unsigned long __init xen_do_chunk(unsigned long start,
@@ -157,25 +168,24 @@ static unsigned long __init xen_populate_chunk(
unsigned long dest_pfn;
for (i = 0, entry = list; i < map_size; i++, entry++) {
- unsigned long credits = credits_left;
unsigned long s_pfn;
unsigned long e_pfn;
unsigned long pfns;
long capacity;
- if (credits <= 0)
+ if (credits_left <= 0)
break;
if (entry->type != E820_RAM)
continue;
- e_pfn = PFN_UP(entry->addr + entry->size);
+ e_pfn = PFN_DOWN(entry->addr + entry->size);
/* We only care about E820 after the xen_start_info->nr_pages */
if (e_pfn <= max_pfn)
continue;
- s_pfn = PFN_DOWN(entry->addr);
+ s_pfn = PFN_UP(entry->addr);
/* If the E820 falls within the nr_pages, we want to start
* at the nr_pages PFN.
* If that would mean going past the E820 entry, skip it
@@ -184,23 +194,19 @@ static unsigned long __init xen_populate_chunk(
capacity = e_pfn - max_pfn;
dest_pfn = max_pfn;
} else {
- /* last_pfn MUST be within E820_RAM regions */
- if (*last_pfn && e_pfn >= *last_pfn)
- s_pfn = *last_pfn;
capacity = e_pfn - s_pfn;
dest_pfn = s_pfn;
}
- /* If we had filled this E820_RAM entry, go to the next one. */
- if (capacity <= 0)
- continue;
- if (credits > capacity)
- credits = capacity;
+ if (credits_left < capacity)
+ capacity = credits_left;
- pfns = xen_do_chunk(dest_pfn, dest_pfn + credits, false);
+ pfns = xen_do_chunk(dest_pfn, dest_pfn + capacity, false);
done += pfns;
- credits_left -= pfns;
*last_pfn = (dest_pfn + pfns);
+ if (pfns < capacity)
+ break;
+ credits_left -= pfns;
}
return done;
}
@@ -212,13 +218,19 @@ static void __init xen_set_identity_and_release_chunk(
unsigned long pfn;
/*
- * If the PFNs are currently mapped, the VA mapping also needs
- * to be updated to be 1:1.
+ * If the PFNs are currently mapped, clear the mappings
+ * (except for the ISA region which must be 1:1 mapped) to
+ * release the refcounts (in Xen) on the original frames.
*/
- for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
+ for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) {
+ pte_t pte = __pte_ma(0);
+
+ if (pfn < PFN_UP(ISA_END_ADDRESS))
+ pte = mfn_pte(pfn, PAGE_KERNEL_IO);
+
(void)HYPERVISOR_update_va_mapping(
- (unsigned long)__va(pfn << PAGE_SHIFT),
- mfn_pte(pfn, PAGE_KERNEL_IO), 0);
+ (unsigned long)__va(pfn << PAGE_SHIFT), pte, 0);
+ }
if (start_pfn < nr_pages)
*released += xen_release_chunk(
@@ -310,6 +322,17 @@ static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
e820_add_region(start, end - start, type);
}
+void xen_ignore_unusable(struct e820entry *list, size_t map_size)
+{
+ struct e820entry *entry;
+ unsigned int i;
+
+ for (i = 0, entry = list; i < map_size; i++, entry++) {
+ if (entry->type == E820_UNUSABLE)
+ entry->type = E820_RAM;
+ }
+}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
@@ -350,6 +373,17 @@ char * __init xen_memory_setup(void)
}
BUG_ON(rc);
+ /*
+ * Xen won't allow a 1:1 mapping to be created to UNUSABLE
+ * regions, so if we're using the machine memory map leave the
+ * region as RAM as it is in the pseudo-physical map.
+ *
+ * UNUSABLE regions in domUs are not handled and will need
+ * a patch in the future.
+ */
+ if (xen_initial_domain())
+ xen_ignore_unusable(map, memmap.nr_entries);
+
/* Make sure the Xen-supplied memory map is well-ordered. */
sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
@@ -417,6 +451,15 @@ char * __init xen_memory_setup(void)
}
/*
+ * Set the rest as identity mapped, in case PCI BARs are
+ * located here.
+ *
+ * PFNs above MAX_P2M_PFN are considered identity mapped as
+ * well.
+ */
+ set_phys_range_identity(map[i-1].addr / PAGE_SIZE, ~0ul);
+
+ /*
* In domU, the ISA region is normal, usable memory, but we
* reserve ISA memory anyway because too many things poke
* about in there.
@@ -429,6 +472,24 @@ char * __init xen_memory_setup(void)
* - mfn_list
* - xen_start_info
* See comment above "struct start_info" in <xen/interface/xen.h>
+ * We tried to make the the memblock_reserve more selective so
+ * that it would be clear what region is reserved. Sadly we ran
+ * in the problem wherein on a 64-bit hypervisor with a 32-bit
+ * initial domain, the pt_base has the cr3 value which is not
+ * neccessarily where the pagetable starts! As Jan put it: "
+ * Actually, the adjustment turns out to be correct: The page
+ * tables for a 32-on-64 dom0 get allocated in the order "first L1",
+ * "first L2", "first L3", so the offset to the page table base is
+ * indeed 2. When reading xen/include/public/xen.h's comment
+ * very strictly, this is not a violation (since there nothing is said
+ * that the first thing in the page table space is pointed to by
+ * pt_base; I admit that this seems to be implied though, namely
+ * do I think that it is implied that the page table space is the
+ * range [pt_base, pt_base + nt_pt_frames), whereas that
+ * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
+ * which - without a priori knowledge - the kernel would have
+ * difficulty to figure out)." - so lets just fall back to the
+ * easy way and reserve the whole region.
*/
memblock_reserve(__pa(xen_start_info->mfn_list),
xen_start_info->pt_base - xen_start_info->mfn_list);
@@ -439,6 +500,35 @@ char * __init xen_memory_setup(void)
}
/*
+ * Machine specific memory setup for auto-translated guests.
+ */
+char * __init xen_auto_xlated_memory_setup(void)
+{
+ static struct e820entry map[E820MAX] __initdata;
+
+ struct xen_memory_map memmap;
+ int i;
+ int rc;
+
+ memmap.nr_entries = E820MAX;
+ set_xen_guest_handle(memmap.buffer, map);
+
+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
+ if (rc < 0)
+ panic("No memory map (%d)\n", rc);
+
+ sanitize_e820_map(map, ARRAY_SIZE(map), &memmap.nr_entries);
+
+ for (i = 0; i < memmap.nr_entries; i++)
+ e820_add_region(map[i].addr, map[i].size, map[i].type);
+
+ memblock_reserve(__pa(xen_start_info->mfn_list),
+ xen_start_info->pt_base - xen_start_info->mfn_list);
+
+ return "Xen";
+}
+
+/*
* Set the bit indicating "nosegneg" library variants should be used.
* We only need to bother in pure 32-bit mode; compat 32-bit processes
* can have un-truncated segments, so wrapping around is allowed.
@@ -446,15 +536,22 @@ char * __init xen_memory_setup(void)
static void __init fiddle_vdso(void)
{
#ifdef CONFIG_X86_32
+ /*
+ * This could be called before selected_vdso32 is initialized, so
+ * just fiddle with both possible images. vdso_image_32_syscall
+ * can't be selected, since it only exists on 64-bit systems.
+ */
u32 *mask;
- mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
+ mask = vdso_image_32_int80.data +
+ vdso_image_32_int80.sym_VDSO32_NOTE_MASK;
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
- mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
+ mask = vdso_image_32_sysenter.data +
+ vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK;
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
#endif
}
-static int __cpuinit register_callback(unsigned type, const void *func)
+static int register_callback(unsigned type, const void *func)
{
struct callback_register callback = {
.type = type,
@@ -465,7 +562,7 @@ static int __cpuinit register_callback(unsigned type, const void *func)
return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
}
-void __cpuinit xen_enable_sysenter(void)
+void xen_enable_sysenter(void)
{
int ret;
unsigned sysenter_feature;
@@ -484,7 +581,7 @@ void __cpuinit xen_enable_sysenter(void)
setup_clear_cpu_cap(sysenter_feature);
}
-void __cpuinit xen_enable_syscall(void)
+void xen_enable_syscall(void)
{
#ifdef CONFIG_X86_64
int ret;
@@ -505,16 +602,13 @@ void __cpuinit xen_enable_syscall(void)
#endif /* CONFIG_X86_64 */
}
-void __init xen_arch_setup(void)
+void __init xen_pvmmu_arch_setup(void)
{
- xen_panic_handler_init();
-
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
- if (!xen_feature(XENFEAT_auto_translated_physmap))
- HYPERVISOR_vm_assist(VMASST_CMD_enable,
- VMASST_TYPE_pae_extended_cr3);
+ HYPERVISOR_vm_assist(VMASST_CMD_enable,
+ VMASST_TYPE_pae_extended_cr3);
if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
@@ -522,6 +616,14 @@ void __init xen_arch_setup(void)
xen_enable_sysenter();
xen_enable_syscall();
+}
+
+/* This function is not called for HVM domains */
+void __init xen_arch_setup(void)
+{
+ xen_panic_handler_init();
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
+ xen_pvmmu_arch_setup();
#ifdef CONFIG_ACPI
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
@@ -535,11 +637,11 @@ void __init xen_arch_setup(void)
COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
/* Set up idle, making sure it calls safe_halt() pvop */
-#ifdef CONFIG_X86_32
- boot_cpu_data.hlt_works_ok = 1;
-#endif
disable_cpuidle();
disable_cpufreq();
- WARN_ON(set_pm_idle_to_default());
+ WARN_ON(xen_set_default_idle());
fiddle_vdso();
+#ifdef CONFIG_NUMA
+ numa_off = 1;
+#endif
}