aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-25 12:32:10 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-25 12:32:10 -0700
commit4b7227ca321ccf447cdc04538687c895db8b77f5 (patch)
tree72712127fc56aa2579e8a1508998bcabf6bd6c60
parent5dae61b80564a5583ff4b56e357bdbc733fddb76 (diff)
parent1775826ceec51187aa868406585799b7e76ffa7d (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-xen-next: (52 commits) xen: add balloon driver xen: allow compilation with non-flat memory xen: fold xen_sysexit into xen_iret xen: allow set_pte_at on init_mm to be lockless xen: disable preemption during tlb flush xen pvfb: Para-virtual framebuffer, keyboard and pointer driver xen: Add compatibility aliases for frontend drivers xen: Module autoprobing support for frontend drivers xen blkfront: Delay wait for block devices until after the disk is added xen/blkfront: use bdget_disk xen: Make xen-blkfront write its protocol ABI to xenstore xen: import arch generic part of xencomm xen: make grant table arch portable xen: replace callers of alloc_vm_area()/free_vm_area() with xen_ prefixed one xen: make include/xen/page.h portable moving those definitions under asm dir xen: add resend_irq_on_evtchn() definition into events.c Xen: make events.c portable for ia64/xen support xen: move events.c to drivers/xen for IA64/Xen support xen: move features.c from arch/x86/xen/features.c to drivers/xen xen: add missing definitions in include/xen/interface/vcpu.h which ia64/xen needs ...
-rw-r--r--arch/x86/kernel/entry_32.S12
-rw-r--r--arch/x86/kernel/paravirt.c12
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/smpboot.c4
-rw-r--r--arch/x86/kernel/vmi_32.c22
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c4
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/init_32.c8
-rw-r--r--arch/x86/mm/ioremap.c2
-rw-r--r--arch/x86/mm/pageattr.c4
-rw-r--r--arch/x86/mm/pgtable.c276
-rw-r--r--arch/x86/mm/pgtable_32.c204
-rw-r--r--arch/x86/xen/Kconfig2
-rw-r--r--arch/x86/xen/Makefile4
-rw-r--r--arch/x86/xen/enlighten.c54
-rw-r--r--arch/x86/xen/grant-table.c91
-rw-r--r--arch/x86/xen/mmu.c143
-rw-r--r--arch/x86/xen/setup.c21
-rw-r--r--arch/x86/xen/smp.c20
-rw-r--r--arch/x86/xen/xen-asm.S42
-rw-r--r--arch/x86/xen/xen-ops.h8
-rw-r--r--drivers/Kconfig2
-rw-r--r--drivers/block/xen-blkfront.c23
-rw-r--r--drivers/input/Kconfig9
-rw-r--r--drivers/input/Makefile2
-rw-r--r--drivers/input/xen-kbdfront.c340
-rw-r--r--drivers/net/xen-netfront.c2
-rw-r--r--drivers/video/Kconfig14
-rw-r--r--drivers/video/Makefile1
-rw-r--r--drivers/video/xen-fbfront.c550
-rw-r--r--drivers/xen/Kconfig19
-rw-r--r--drivers/xen/Makefile4
-rw-r--r--drivers/xen/balloon.c712
-rw-r--r--drivers/xen/events.c (renamed from arch/x86/xen/events.c)121
-rw-r--r--drivers/xen/features.c (renamed from arch/x86/xen/features.c)0
-rw-r--r--drivers/xen/grant-table.c37
-rw-r--r--drivers/xen/xenbus/xenbus_client.c6
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c32
-rw-r--r--drivers/xen/xencomm.c232
-rw-r--r--include/asm-x86/paravirt.h43
-rw-r--r--include/asm-x86/pgalloc.h111
-rw-r--r--include/asm-x86/pgalloc_32.h95
-rw-r--r--include/asm-x86/pgalloc_64.h133
-rw-r--r--include/asm-x86/pgtable.h54
-rw-r--r--include/asm-x86/pgtable_32.h18
-rw-r--r--include/asm-x86/pgtable_64.h2
-rw-r--r--include/asm-x86/xen/events.h22
-rw-r--r--include/asm-x86/xen/grant_table.h7
-rw-r--r--include/asm-x86/xen/hypercall.h6
-rw-r--r--include/asm-x86/xen/interface.h28
-rw-r--r--include/asm-x86/xen/page.h168
-rw-r--r--include/xen/balloon.h61
-rw-r--r--include/xen/events.h9
-rw-r--r--include/xen/grant_table.h7
-rw-r--r--include/xen/interface/callback.h102
-rw-r--r--include/xen/interface/grant_table.h11
-rw-r--r--include/xen/interface/io/fbif.h124
-rw-r--r--include/xen/interface/io/kbdif.h114
-rw-r--r--include/xen/interface/io/protocols.h21
-rw-r--r--include/xen/interface/memory.h12
-rw-r--r--include/xen/interface/vcpu.h5
-rw-r--r--include/xen/interface/xen.h22
-rw-r--r--include/xen/interface/xencomm.h41
-rw-r--r--include/xen/page.h181
-rw-r--r--include/xen/xen-ops.h8
-rw-r--r--include/xen/xenbus.h1
-rw-r--r--include/xen/xencomm.h77
67 files changed, 3607 insertions, 921 deletions
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0f8934fc30..2a609dc3271 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,7 @@ restore_nocheck_notrace:
irq_return:
INTERRUPT_RETURN
.section .fixup,"ax"
-iret_exc:
+ENTRY(iret_exc)
pushl $0 # no error code
pushl $do_iret_error
jmp error_code
@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
ENDPROC(kernel_thread_helper)
#ifdef CONFIG_XEN
+/* Xen doesn't set %esp to be precisely what the normal sysenter
+ entrypoint expects, so fix it up before using the normal path. */
+ENTRY(xen_sysenter_target)
+ RING0_INT_FRAME
+ addl $5*4, %esp /* remove xen-provided frame */
+ jmp sysenter_past_esp
+
ENTRY(xen_hypervisor_callback)
CFI_STARTPROC
pushl $0
@@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback)
cmpl $xen_iret_end_crit,%eax
jae 1f
- call xen_iret_crit_fixup
+ jmp xen_iret_crit_fixup
+ENTRY(xen_do_upcall)
1: mov %esp, %eax
call xen_evtchn_do_upcall
jmp ret_from_intr
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3733412d135..74f0c5ea2a0 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = {
.flush_tlb_single = native_flush_tlb_single,
.flush_tlb_others = native_flush_tlb_others,
- .alloc_pt = paravirt_nop,
- .alloc_pd = paravirt_nop,
- .alloc_pd_clone = paravirt_nop,
- .release_pt = paravirt_nop,
- .release_pd = paravirt_nop,
+ .alloc_pte = paravirt_nop,
+ .alloc_pmd = paravirt_nop,
+ .alloc_pmd_clone = paravirt_nop,
+ .alloc_pud = paravirt_nop,
+ .release_pte = paravirt_nop,
+ .release_pmd = paravirt_nop,
+ .release_pud = paravirt_nop,
.set_pte = native_set_pte,
.set_pte_at = native_set_pte_at,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 19c9386ac11..1791a751a77 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -8,6 +8,7 @@
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/hpet.h>
+#include <asm/pgtable.h>
#include <asm/reboot_fixups.h>
#include <asm/reboot.h>
@@ -15,7 +16,6 @@
# include <linux/dmi.h>
# include <linux/ctype.h>
# include <linux/mc146818rtc.h>
-# include <asm/pgtable.h>
#else
# include <asm/iommu.h>
#endif
@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length)
/* Remap the kernel at virtual address zero, as well as offset zero
from the kernel segment. This assumes the kernel segment starts at
virtual address PAGE_OFFSET. */
- memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+ memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ade371f9663..eef79e84145 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
#ifdef CONFIG_X86_32
/* init low mem mapping */
- clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
+ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
flush_tlb_all();
#endif
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 12affe1f9bc..956f38927aa 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page)
* pdes need to be zeroed.
*/
if (type & VMI_PAGE_CLONE)
- limit = USER_PTRS_PER_PGD;
+ limit = KERNEL_PGD_BOUNDARY;
for (i = 0; i < limit; i++)
BUG_ON(ptr[i]);
}
@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
}
#endif
-static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
{
vmi_set_page_type(pfn, VMI_PAGE_L1);
vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
}
-static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
{
/*
* This call comes in very early, before mem_map is setup.
@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
}
-static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
+static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
{
vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
vmi_check_page_type(clonepfn, VMI_PAGE_L2);
vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
}
-static void vmi_release_pt(u32 pfn)
+static void vmi_release_pte(u32 pfn)
{
vmi_ops.release_page(pfn, VMI_PAGE_L1);
vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
}
-static void vmi_release_pd(u32 pfn)
+static void vmi_release_pmd(u32 pfn)
{
vmi_ops.release_page(pfn, VMI_PAGE_L2);
vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void)
vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
if (vmi_ops.allocate_page) {
- pv_mmu_ops.alloc_pt = vmi_allocate_pt;
- pv_mmu_ops.alloc_pd = vmi_allocate_pd;
- pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+ pv_mmu_ops.alloc_pte = vmi_allocate_pte;
+ pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
+ pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
}
vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
if (vmi_ops.release_page) {
- pv_mmu_ops.release_pt = vmi_release_pt;
- pv_mmu_ops.release_pd = vmi_release_pd;
+ pv_mmu_ops.release_pte = vmi_release_pte;
+ pv_mmu_ops.release_pmd = vmi_release_pmd;
}
/* Set linear is needed in all cases */
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index d05722121d2..6e2c4efce0e 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -543,8 +543,8 @@ static void __init do_boot_cpu(__u8 cpu)
hijack_source.idt.Offset, stack_start.sp));
/* init lowmem identity mapping */
- clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
- min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
+ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
flush_tlb_all();
if (quad_boot) {
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 20941d2954e..b7b3e4c7cfc 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o
+ pat.o pgtable.o
obj-$(CONFIG_X86_32) += pgtable_32.o
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ec62da85fd..08aa1878fad 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
- paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
+ paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
pud = pud_offset(pgd, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
}
- paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+ paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
BUG_ON(page_table != pte_offset_kernel(pmd, 0));
}
@@ -365,7 +365,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
pte_clear(NULL, va, pte);
}
- paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
+ paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
}
void __init native_pagetable_setup_done(pgd_t *base)
@@ -457,7 +457,7 @@ void zap_low_mappings(void)
* Note that "pgd_clear()" doesn't do it for
* us, because pgd_clear() is a no-op on i386.
*/
- for (i = 0; i < USER_PTRS_PER_PGD; i++) {
+ for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
#ifdef CONFIG_X86_PAE
set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
#else
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3a4baf95e24..36a3f7ded62 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -407,7 +407,7 @@ void __init early_ioremap_clear(void)
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
pmd_clear(pmd);
- paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
+ paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
__flush_tlb_all();
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index c29ebd03725..bd5e05c654d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
goto out_unlock;
pbase = (pte_t *)page_address(base);
-#ifdef CONFIG_X86_32
- paravirt_alloc_pt(&init_mm, page_to_pfn(base));
-#endif
+ paravirt_alloc_pte(&init_mm, page_to_pfn(base));
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
#ifdef CONFIG_X86_64
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
new file mode 100644
index 00000000000..50159764f69
--- /dev/null
+++ b/arch/x86/mm/pgtable.c
@@ -0,0 +1,276 @@
+#include <linux/mm.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+ return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+}
+
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ struct page *pte;
+
+#ifdef CONFIG_HIGHPTE
+ pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+#else
+ pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+#endif
+ if (pte)
+ pgtable_page_ctor(pte);
+ return pte;
+}
+
+void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+{
+ pgtable_page_dtor(pte);
+ paravirt_release_pte(page_to_pfn(pte));
+ tlb_remove_page(tlb, pte);
+}
+
+#if PAGETABLE_LEVELS > 2
+void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+{
+ paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
+ tlb_remove_page(tlb, virt_to_page(pmd));
+}
+
+#if PAGETABLE_LEVELS > 3
+void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+{
+ paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
+ tlb_remove_page(tlb, virt_to_page(pud));
+}
+#endif /* PAGETABLE_LEVELS > 3 */
+#endif /* PAGETABLE_LEVELS > 2 */
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+ struct page *page = virt_to_page(pgd);
+
+ list_add(&page->lru, &pgd_list);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+ struct page *page = virt_to_page(pgd);
+
+ list_del(&page->lru);
+}
+
+#define UNSHARED_PTRS_PER_PGD \
+ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
+
+static void pgd_ctor(void *p)
+{
+ pgd_t *pgd = p;
+ unsigned long flags;
+
+ /* Clear usermode parts of PGD */
+ memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
+
+ spin_lock_irqsave(&pgd_lock, flags);
+
+ /* If the pgd points to a shared pagetable level (either the
+ ptes in non-PAE, or shared PMD in PAE), then just copy the
+ references from swapper_pg_dir. */
+ if (PAGETABLE_LEVELS == 2 ||
+ (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
+ PAGETABLE_LEVELS == 4) {
+ clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+ paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
+ KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+ }
+
+ /* list required to sync kernel mapping updates */
+ if (!SHARED_KERNEL_PMD)
+ pgd_list_add(pgd);
+
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void pgd_dtor(void *pgd)
+{
+ unsigned long flags; /* can be called from interrupt context */
+
+ if (SHARED_KERNEL_PMD)
+ return;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_del(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+/*
+ * List of all pgd's needed for non-PAE so it can invalidate entries
+ * in both cached and uncached pgd's; not needed for PAE since the
+ * kernel pmd is shared. If PAE were not to share the pmd a similar
+ * tactic would be needed. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * -- wli
+ */
+
+#ifdef CONFIG_X86_PAE
+/*
+ * Mop up any pmd pages which may still be attached to the pgd.
+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
+ * preallocate which never got a corresponding vma will need to be
+ * freed manually.
+ */
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
+{
+ int i;
+
+ for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
+ pgd_t pgd = pgdp[i];
+
+ if (pgd_val(pgd) != 0) {
+ pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+
+ pgdp[i] = native_make_pgd(0);
+
+ paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
+ pmd_free(mm, pmd);
+ }
+ }
+}
+
+/*
+ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
+ * updating the top-level pagetable entries to guarantee the
+ * processor notices the update. Since this is expensive, and
+ * all 4 top-level entries are used almost immediately in a
+ * new process's life, we just pre-populate them here.
+ *
+ * Also, if we're in a paravirt environment where the kernel pmd is
+ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
+ * and initialize the kernel pmds here.
+ */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+ pud_t *pud;
+ unsigned long addr;
+ int i;
+
+ pud = pud_offset(pgd, 0);
+ for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
+ i++, pud++, addr += PUD_SIZE) {
+ pmd_t *pmd = pmd_alloc_one(mm, addr);
+
+ if (!pmd) {
+ pgd_mop_up_pmds(mm, pgd);
+ return 0;
+ }
+
+ if (i >= KERNEL_PGD_BOUNDARY)
+ memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
+ sizeof(pmd_t) * PTRS_PER_PMD);
+
+ pud_populate(mm, pud, pmd);
+ }
+
+ return 1;
+}
+
+void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
+{
+ paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
+
+ /* Note: almost everything apart from _PAGE_PRESENT is
+ reserved at the pmd (PDPT) level. */
+ set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+
+ /*
+ * According to Intel App note "TLBs, Paging-Structure Caches,
+ * and Their Invalidation", April 2007, document 317080-001,
+ * section 8.1: in PAE mode we explicitly have to flush the
+ * TLB via cr3 if the top-level pgd is changed...
+ */
+ if (mm == current->active_mm)
+ write_cr3(read_cr3());
+}
+#else /* !CONFIG_X86_PAE */
+/* No need to prepopulate any pagetable entries in non-PAE modes. */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+ return 1;
+}
+
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
+{
+}
+#endif /* CONFIG_X86_PAE */
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+
+ /* so that alloc_pmd can use it */
+ mm->pgd = pgd;
+ if (pgd)
+ pgd_ctor(pgd);
+
+ if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
+ pgd_dtor(pgd);
+ free_page((unsigned long)pgd);
+ pgd = NULL;
+ }
+
+ return pgd;
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ pgd_mop_up_pmds(mm, pgd);
+ pgd_dtor(pgd);
+ free_page((unsigned long)pgd);
+}
+
+int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty)
+{
+ int changed = !pte_same(*ptep, entry);
+
+ if (changed && dirty) {
+ *ptep = entry;
+ pte_update_defer(vma->vm_mm, address, ptep);
+ flush_tlb_page(vma, address);
+ }
+
+ return changed;
+}
+
+int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ int ret = 0;
+
+ if (pte_young(*ptep))
+ ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+ &ptep->pte);
+
+ if (ret)
+ pte_update(vma->vm_mm, addr, ptep);
+