diff options
Diffstat (limited to 'arch/x86/mm/pageattr.c')
| -rw-r--r-- | arch/x86/mm/pageattr.c | 702 |
1 files changed, 606 insertions, 96 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 532e7933d60..ae242a7c11c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -13,6 +13,7 @@ #include <linux/pfn.h> #include <linux/percpu.h> #include <linux/gfp.h> +#include <linux/pci.h> #include <asm/e820.h> #include <asm/processor.h> @@ -29,6 +30,7 @@ */ struct cpa_data { unsigned long *vaddr; + pgd_t *pgd; pgprot_t mask_set; pgprot_t mask_clr; int numpages; @@ -56,12 +58,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM]; void update_page_count(int level, unsigned long pages) { - unsigned long flags; - /* Protect against CPA */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); direct_pages_count[level] += pages; - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } static void split_page_count(int level) @@ -95,12 +95,12 @@ static inline void split_page_count(int level) { } static inline unsigned long highmap_start_pfn(void) { - return __pa(_text) >> PAGE_SHIFT; + return __pa_symbol(_text) >> PAGE_SHIFT; } static inline unsigned long highmap_end_pfn(void) { - return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; + return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; } #endif @@ -123,11 +123,11 @@ within(unsigned long addr, unsigned long start, unsigned long end) /** * clflush_cache_range - flush a cache range with clflush - * @addr: virtual start address + * @vaddr: virtual start address * @size: number of bytes to flush * - * clflush is an unordered instruction which needs fencing with mfence - * to avoid ordering issues. + * clflushopt is an unordered instruction which needs fencing with mfence or + * sfence to avoid ordering issues. */ void clflush_cache_range(void *vaddr, unsigned int size) { @@ -136,11 +136,11 @@ void clflush_cache_range(void *vaddr, unsigned int size) mb(); for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) - clflush(vaddr); + clflushopt(vaddr); /* * Flush any possible final partial cacheline: */ - clflush(vend); + clflushopt(vend); mb(); } @@ -260,8 +260,10 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * The BIOS area between 640k and 1Mb needs to be executable for * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ - if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) +#ifdef CONFIG_PCI_BIOS + if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_NX; +#endif /* * The kernel text needs to be executable for obvious reasons @@ -275,8 +277,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * The .rodata section needs to be read-only. Using the pfn * catches all aliases. */ - if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, - __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) + if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, + __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) @@ -309,7 +311,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * these shared mappings are made of small page mappings. * Thus this don't enforce !RW mapping for small page kernel * text mapping logic will help Linux Xen parvirt guest boot - * aswell. + * as well. */ if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) pgprot_val(forbidden) |= _PAGE_RW; @@ -322,16 +324,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, } /* - * Lookup the page table entry for a virtual address. Return a pointer - * to the entry and the level of the mapping. - * - * Note: We return pud and pmd either when the entry is marked large - * or when the present bit is not set. Otherwise we would return a - * pointer to a nonexisting mapping. + * Lookup the page table entry for a virtual address in a specific pgd. + * Return a pointer to the entry and the level of the mapping. */ -pte_t *lookup_address(unsigned long address, unsigned int *level) +pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + unsigned int *level) { - pgd_t *pgd = pgd_offset_k(address); pud_t *pud; pmd_t *pmd; @@ -360,8 +358,62 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) return pte_offset_kernel(pmd, address); } + +/* + * Lookup the page table entry for a virtual address. Return a pointer + * to the entry and the level of the mapping. + * + * Note: We return pud and pmd either when the entry is marked large + * or when the present bit is not set. Otherwise we would return a + * pointer to a nonexisting mapping. + */ +pte_t *lookup_address(unsigned long address, unsigned int *level) +{ + return lookup_address_in_pgd(pgd_offset_k(address), address, level); +} EXPORT_SYMBOL_GPL(lookup_address); +static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, + unsigned int *level) +{ + if (cpa->pgd) + return lookup_address_in_pgd(cpa->pgd + pgd_index(address), + address, level); + + return lookup_address(address, level); +} + +/* + * This is necessary because __pa() does not work on some + * kinds of memory, like vmalloc() or the alloc_remap() + * areas on 32-bit NUMA systems. The percpu areas can + * end up in this kind of memory, for instance. + * + * This could be optimized, but it is only intended to be + * used at inititalization time, and keeping it + * unoptimized should increase the testing coverage for + * the more obscure platforms. + */ +phys_addr_t slow_virt_to_phys(void *__virt_addr) +{ + unsigned long virt_addr = (unsigned long)__virt_addr; + phys_addr_t phys_addr; + unsigned long offset; + enum pg_level level; + unsigned long psize; + unsigned long pmask; + pte_t *pte; + + pte = lookup_address(virt_addr, &level); + BUG_ON(!pte); + psize = page_level_size(level); + pmask = page_level_mask(level); + offset = virt_addr & ~pmask; + phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + return (phys_addr | offset); +} +EXPORT_SYMBOL_GPL(slow_virt_to_phys); + /* * Set the new pmd in all the pgds we know about: */ @@ -391,35 +443,32 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { - unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; + unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; pte_t new_pte, old_pte, *tmp; - pgprot_t old_prot, new_prot; + pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; - unsigned int level; + enum pg_level level; if (cpa->force_split) return 1; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up already: */ - tmp = lookup_address(address, &level); + tmp = _lookup_address_cpa(cpa, address, &level); if (tmp != kpte) goto out_unlock; switch (level) { case PG_LEVEL_2M: - psize = PMD_PAGE_SIZE; - pmask = PMD_PAGE_MASK; - break; #ifdef CONFIG_X86_64 case PG_LEVEL_1G: - psize = PUD_PAGE_SIZE; - pmask = PUD_PAGE_MASK; - break; #endif + psize = page_level_size(level); + pmask = page_level_mask(level); + break; default: do_split = -EINVAL; goto out_unlock; @@ -438,10 +487,23 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * We are safe now. Check whether the new pgprot is the same: */ old_pte = *kpte; - old_prot = new_prot = pte_pgprot(old_pte); + old_prot = req_prot = pte_pgprot(old_pte); - pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); + pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); + + /* + * Set the PSE and GLOBAL flags only if the PRESENT flag is + * set otherwise pmd_present/pmd_huge will return true even on + * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL + * for the ancient hardware that doesn't support it. + */ + if (pgprot_val(req_prot) & _PAGE_PRESENT) + pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL; + else + pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); + + req_prot = canon_pgprot(req_prot); /* * old_pte points to the large page base address. So we need @@ -450,17 +512,17 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); cpa->pfn = pfn; - new_prot = static_protections(new_prot, address, pfn); + new_prot = static_protections(req_prot, address, pfn); /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - addr = address + PAGE_SIZE; - pfn++; - for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { - pgprot_t chk_prot = static_protections(new_prot, addr, pfn); + addr = address & pmask; + pfn = pte_pfn(old_pte); + for (i = 0; i < (psize >> PAGE_SHIFT); i++, addr += PAGE_SIZE, pfn++) { + pgprot_t chk_prot = static_protections(req_prot, addr, pfn); if (pgprot_val(chk_prot) != pgprot_val(new_prot)) goto out_unlock; @@ -483,49 +545,44 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * that we limited the number of possible pages already to * the number of pages in the large page. */ - if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { + if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { /* * The address is aligned and the number of pages * covers the full page. */ - new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); + new_pte = pfn_pte(pte_pfn(old_pte), new_prot); __set_pmd_pte(kpte, address, new_pte); cpa->flags |= CPA_FLUSHTLB; do_split = 0; } out_unlock: - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return do_split; } -static int split_large_page(pte_t *kpte, unsigned long address) +static int +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, + struct page *base) { - unsigned long flags, pfn, pfninc = 1; + pte_t *pbase = (pte_t *)page_address(base); + unsigned long pfn, pfninc = 1; unsigned int i, level; - pte_t *pbase, *tmp; + pte_t *tmp; pgprot_t ref_prot; - struct page *base; - - if (!debug_pagealloc) - spin_unlock(&cpa_lock); - base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); - if (!debug_pagealloc) - spin_lock(&cpa_lock); - if (!base) - return -ENOMEM; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up for us already: */ - tmp = lookup_address(address, &level); - if (tmp != kpte) - goto out_unlock; + tmp = _lookup_address_cpa(cpa, address, &level); + if (tmp != kpte) { + spin_unlock(&pgd_lock); + return 1; + } - pbase = (pte_t *)page_address(base); paravirt_alloc_pte(&init_mm, page_to_pfn(base)); ref_prot = pte_pgprot(pte_clrhuge(*kpte)); /* @@ -539,27 +596,40 @@ static int split_large_page(pte_t *kpte, unsigned long address) #ifdef CONFIG_X86_64 if (level == PG_LEVEL_1G) { pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; - pgprot_val(ref_prot) |= _PAGE_PSE; + /* + * Set the PSE flags only if the PRESENT flag is set + * otherwise pmd_present/pmd_huge will return true + * even on a non present pmd. + */ + if (pgprot_val(ref_prot) & _PAGE_PRESENT) + pgprot_val(ref_prot) |= _PAGE_PSE; + else + pgprot_val(ref_prot) &= ~_PAGE_PSE; } #endif /* + * Set the GLOBAL flags only if the PRESENT flag is set + * otherwise pmd/pte_present will return true even on a non + * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL + * for the ancient hardware that doesn't support it. + */ + if (pgprot_val(ref_prot) & _PAGE_PRESENT) + pgprot_val(ref_prot) |= _PAGE_GLOBAL; + else + pgprot_val(ref_prot) &= ~_PAGE_GLOBAL; + + /* * Get the target pfn from the original entry: */ pfn = pte_pfn(*kpte); for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) - set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); + set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot))); - if (address >= (unsigned long)__va(0) && - address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) + if (pfn_range_is_mapped(PFN_DOWN(__pa(address)), + PFN_DOWN(__pa(address)) + 1)) split_page_count(level); -#ifdef CONFIG_X86_64 - if (address >= (unsigned long)__va(1UL<<32) && - address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) - split_page_count(level); -#endif - /* * Install the new, split up pagetable. * @@ -578,24 +648,420 @@ static int split_large_page(pte_t *kpte, unsigned long address) * going on. */ __flush_tlb_all(); + spin_unlock(&pgd_lock); - base = NULL; + return 0; +} + +static int split_large_page(struct cpa_data *cpa, pte_t *kpte, + unsigned long address) +{ + struct page *base; + + if (!debug_pagealloc) + spin_unlock(&cpa_lock); + base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); + if (!debug_pagealloc) + spin_lock(&cpa_lock); + if (!base) + return -ENOMEM; + + if (__split_large_page(cpa, kpte, address, base)) + __free_page(base); + + return 0; +} + +static bool try_to_free_pte_page(pte_t *pte) +{ + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) + if (!pte_none(pte[i])) + return false; + + free_page((unsigned long)pte); + return true; +} + +static bool try_to_free_pmd_page(pmd_t *pmd) +{ + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) + if (!pmd_none(pmd[i])) + return false; + + free_page((unsigned long)pmd); + return true; +} + +static bool try_to_free_pud_page(pud_t *pud) +{ + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) + if (!pud_none(pud[i])) + return false; + + free_page((unsigned long)pud); + return true; +} + +static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) +{ + pte_t *pte = pte_offset_kernel(pmd, start); + + while (start < end) { + set_pte(pte, __pte(0)); + + start += PAGE_SIZE; + pte++; + } + + if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { + pmd_clear(pmd); + return true; + } + return false; +} + +static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, + unsigned long start, unsigned long end) +{ + if (unmap_pte_range(pmd, start, end)) + if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + pud_clear(pud); +} + +static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) +{ + pmd_t *pmd = pmd_offset(pud, start); -out_unlock: /* - * If we dropped out via the lookup_address check under - * pgd_lock then stick the page back into the pool: + * Not on a 2MB page boundary? */ - if (base) - __free_page(base); - spin_unlock_irqrestore(&pgd_lock, flags); + if (start & (PMD_SIZE - 1)) { + unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; + unsigned long pre_end = min_t(unsigned long, end, next_page); + + __unmap_pmd_range(pud, pmd, start, pre_end); + + start = pre_end; + pmd++; + } + + /* + * Try to unmap in 2M chunks. + */ + while (end - start >= PMD_SIZE) { + if (pmd_large(*pmd)) + pmd_clear(pmd); + else + __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); + + start += PMD_SIZE; + pmd++; + } + + /* + * 4K leftovers? + */ + if (start < end) + return __unmap_pmd_range(pud, pmd, start, end); + + /* + * Try again to free the PMD page if haven't succeeded above. + */ + if (!pud_none(*pud)) + if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) + pud_clear(pud); +} + +static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) +{ + pud_t *pud = pud_offset(pgd, start); + + /* + * Not on a GB page boundary? + */ + if (start & (PUD_SIZE - 1)) { + unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; + unsigned long pre_end = min_t(unsigned long, end, next_page); + + unmap_pmd_range(pud, start, pre_end); + + start = pre_end; + pud++; + } + /* + * Try to unmap in 1G chunks? + */ + while (end - start >= PUD_SIZE) { + + if (pud_large(*pud)) + pud_clear(pud); + else + unmap_pmd_range(pud, start, start + PUD_SIZE); + + start += PUD_SIZE; + pud++; + } + + /* + * 2M leftovers? + */ + if (start < end) + unmap_pmd_range(pud, start, end); + + /* + * No need to try to free the PUD page because we'll free it in + * populate_pgd's error path + */ +} + +static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end) +{ + pgd_t *pgd_entry = root + pgd_index(addr); + + unmap_pud_range(pgd_entry, addr, end); + + if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry))) + pgd_clear(pgd_entry); +} + +static int alloc_pte_page(pmd_t *pmd) +{ + pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + if (!pte) + return -1; + + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + return 0; +} + +static int alloc_pmd_page(pud_t *pud) +{ + pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + if (!pmd) + return -1; + + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + return 0; +} + +static void populate_pte(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, start); + + while (num_pages-- && start < end) { + + /* deal with the NX bit */ + if (!(pgprot_val(pgprot) & _PAGE_NX)) + cpa->pfn &= ~_PAGE_NX; + + set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); + + start += PAGE_SIZE; + cpa->pfn += PAGE_SIZE; + pte++; + } +} + +static int populate_pmd(struct cpa_data *cpa, + unsigned long start, unsigned long end, + unsigned num_pages, pud_t *pud, pgprot_t pgprot) +{ + unsigned int cur_pages = 0; + pmd_t *pmd; + + /* + * Not on a 2M boundary? + */ + if (start & (PMD_SIZE - 1)) { + unsigned long pre_end = start + (num_pages << PAGE_SHIFT); + unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; + + pre_end = min_t(unsigned long, pre_end, next_page); + cur_pages = (pre_end - start) >> PAGE_SHIFT; + cur_pages = min_t(unsigned int, num_pages, cur_pages); + + /* + * Need a PTE page? + */ + pmd = pmd_offset(pud, start); + if (pmd_none(*pmd)) + if (alloc_pte_page(pmd)) + return -1; + + populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); + + start = pre_end; + } + + /* + * We mapped them all? + */ + if (num_pages == cur_pages) + return cur_pages; + + while (end - start >= PMD_SIZE) { + + /* + * We cannot use a 1G page so allocate a PMD page if needed. + */ + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + pmd = pmd_offset(pud, start); + + set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); + + start += PMD_SIZE; + cpa->pfn += PMD_SIZE; + cur_pages += PMD_SIZE >> PAGE_SHIFT; + } + + /* + * Map trailing 4K pages. + */ + if (start < end) { + pmd = pmd_offset(pud, start); + if (pmd_none(*pmd)) + if (alloc_pte_page(pmd)) + return -1; + + populate_pte(cpa, start, end, num_pages - cur_pages, + pmd, pgprot); + } + return num_pages; +} + +static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + pgprot_t pgprot) +{ + pud_t *pud; + unsigned long end; + int cur_pages = 0; + + end = start + (cpa->numpages << PAGE_SHIFT); + + /* + * Not on a Gb page boundary? => map everything up to it with + * smaller pages. + */ + if (start & (PUD_SIZE - 1)) { + unsigned long pre_end; + unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; + + pre_end = min_t(unsigned long, end, next_page); + cur_pages = (pre_end - start) >> PAGE_SHIFT; + cur_pages = min_t(int, (int)cpa->numpages, cur_pages); + + pud = pud_offset(pgd, start); + + /* + * Need a PMD page? + */ + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, + pud, pgprot); + if (cur_pages < 0) + return cur_pages; + + start = pre_end; + } + + /* We mapped them all? */ + if (cpa->numpages == cur_pages) + return cur_pages; + + pud = pud_offset(pgd, start); + + /* + * Map everything starting from the Gb boundary, possibly with 1G pages + */ + while (end - start >= PUD_SIZE) { + set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); + + start += PUD_SIZE; + cpa->pfn += PUD_SIZE; + cur_pages += PUD_SIZE >> PAGE_SHIFT; + pud++; + } + + /* Map trailing leftover */ + if (start < end) { + int tmp; + + pud = pud_offset(pgd, start); + if (pud_none(*pud)) + if (alloc_pmd_page(pud)) + return -1; + + tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, + pud, pgprot); + if (tmp < 0) + return cur_pages; + + cur_pages += tmp; + } + return cur_pages; +} + +/* + * Restrictions for kernel page table do not necessarily apply when mapping in + * an alternate PGD. + */ +static int populate_pgd(struct cpa_data *cpa, unsigned long addr) +{ + pgprot_t pgprot = __pgprot(_KERNPG_TABLE); + pud_t *pud = NULL; /* shut up gcc */ + pgd_t *pgd_entry; + int ret; + + pgd_entry = cpa->pgd + pgd_index(addr); + + /* + * Allocate a PUD page and hand it down for mapping. + */ + if (pgd_none(*pgd_entry)) { + pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); + if (!pud) + return -1; + + set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); + } + + pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); + + ret = populate_pud(cpa, addr, pgd_entry, pgprot); + if (ret < 0) { + unmap_pgd_range(cpa->pgd, addr, + addr + (cpa->numpages << PAGE_SHIFT)); + return ret; + } + + cpa->numpages = ret; return 0; } static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, int primary) { + if (cpa->pgd) + return populate_pgd(cpa, vaddr); + /* * Ignore all non primary paths. */ @@ -640,7 +1106,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) else address = *cpa->vaddr; repeat: - kpte = lookup_address(address, &level); + kpte = _lookup_address_cpa(cpa, address, &level); if (!kpte) return __cpa_process_fault(cpa, address, primary); @@ -659,6 +1125,18 @@ repeat: new_prot = static_protections(new_prot, address, pfn); /* + * Set the GLOBAL flags only if the PRESENT flag is + * set otherwise pte_present will return true even on + * a non present pte. The canon_pgprot will clear + * _PAGE_GLOBAL for the ancient hardware that doesn't + * support it. + */ + if (pgprot_val(new_prot) & _PAGE_PRESENT) + pgprot_val(new_prot) |= _PAGE_GLOBAL; + else + pgprot_val(new_prot) &= ~_PAGE_GLOBAL; + + /* * We need to keep the pfn from the existing PTE, * after all we're only going to change it's attributes * not the memory it points to @@ -692,7 +1170,7 @@ repeat: /* * We have to split the large page: */ - err = split_large_page(kpte, address); + err = split_large_page(cpa, kpte, address); if (!err) { /* * Do a global flush tlb after splitting the large page @@ -728,13 +1206,9 @@ static int cpa_process_alias(struct cpa_data *cpa) unsigned long vaddr; int ret; - if (cpa->pfn >= max_pfn_mapped) + if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) return 0; -#ifdef CONFIG_X86_64 - if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) - return 0; -#endif /* * No need to redo, when the primary call touched the direct * mapping already: @@ -845,6 +1319,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, int ret, cache, checkalias; unsigned long baddr = 0; + memset(&cpa, 0, sizeof(cpa)); + /* * Check, if we are requested to change a not supported * feature: @@ -917,10 +1393,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cache = cache_attr(mask_set); /* - * On success we use clflush, when the CPU supports it to - * avoid the wbindv. If the CPU does not support it and in the + * On success we use CLFLUSH, when the CPU supports it to + * avoid the WBINVD. If the CPU does not support it and in the * error case we fall back to cpa_flush_all (which uses - * wbindv): + * WBINVD): */ if (!ret && cpu_has_clflush) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { @@ -997,7 +1473,7 @@ out_err: } EXPORT_SYMBOL(set_memory_uc); -int _set_memory_array(unsigned long *addr, int addrinarray, +static int _set_memory_array(unsigned long *addr, int addrinarray, unsigned long new_type) { int i, j; @@ -1291,6 +1767,7 @@ static int __set_pages_p(struct page *page, int numpages) { unsigned long tempaddr = (unsigned long) page_address(page); struct cpa_data cpa = { .vaddr = &tempaddr, + .pgd = NULL, .numpages = numpages, .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), .mask_clr = __pgprot(0), @@ -1309,6 +1786,7 @@ static int __set_pages_np(struct page *page, int numpages) { unsigned long tempaddr = (unsigned long) page_address(page); struct cpa_data cpa = { .vaddr = &tempaddr, + .pgd = NULL, .numpages = numpages, .mask_set = __pgprot(0), .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), @@ -1333,12 +1811,6 @@ void kernel_map_pages(struct page *page, int numpages, int enable) } /* - * If page allocator is not up yet then do not call c_p_a(): - */ - if (!debug_pagealloc_enabled) - return; - - /* * The return value is ignored as the calls cannot fail. * Large pages for identity mappings are not used at boot time * and hence no memory allocations during large page split. @@ -1353,6 +1825,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) * but that can deadlock->flush only current cpu: */ __flush_tlb_all(); + + arch_flush_lazy_mmu_mode(); } #ifdef CONFIG_HIBERNATION @@ -1373,6 +1847,42 @@ bool kernel_page_present(struct page *page) #endif /* CONFIG_DEBUG_PAGEALLOC */ +int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, + unsigned numpages, unsigned long page_flags) +{ + int retval = -EINVAL; + + struct cpa_data cpa = { + .vaddr = &address, + .pfn = pfn, + .pgd = pgd, + .numpages = numpages, + .mask_set = __pgprot(0), + .mask_clr = __pgprot(0), + .flags = 0, + }; + + if (!(__supported_pte_mask & _PAGE_NX)) + goto out; + + if (!(page_flags & _PAGE_NX)) + cpa.mask_clr = __pgprot(_PAGE_NX); + + cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); + + retval = __change_page_attr_set_clr(&cpa, 0); + __flush_tlb_all(); + +out: + return retval; +} + +void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, + unsigned numpages) +{ + unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT)); +} + /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. |
