diff options
| author | Wim Van Sebroeck <wim@iguana.be> | 2007-05-11 19:03:13 +0000 | 
|---|---|---|
| committer | Wim Van Sebroeck <wim@iguana.be> | 2007-05-11 19:03:13 +0000 | 
| commit | 5c34202b8bf942da411b6599668a76b07449bbfd (patch) | |
| tree | 5719c361321eaddc8e4f1b0c8a7994f0e9a6fdd3 /arch/i386/mm/init.c | |
| parent | 0d4804b31f91cfbcff6d62af0bc09a893a1c8ae0 (diff) | |
| parent | 1f8a6b658a943b4f04a1fc7b3a420360202c86cd (diff) | |
Merge /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'arch/i386/mm/init.c')
| -rw-r--r-- | arch/i386/mm/init.c | 188 | 
1 files changed, 126 insertions, 62 deletions
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index ae436882af7..c50782efa5c 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -22,6 +22,7 @@  #include <linux/init.h>  #include <linux/highmem.h>  #include <linux/pagemap.h> +#include <linux/pfn.h>  #include <linux/poison.h>  #include <linux/bootmem.h>  #include <linux/slab.h> @@ -42,6 +43,7 @@  #include <asm/tlb.h>  #include <asm/tlbflush.h>  #include <asm/sections.h> +#include <asm/paravirt.h>  unsigned int __VMALLOC_RESERVE = 128 << 20; @@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)  	pmd_t *pmd_table;  #ifdef CONFIG_X86_PAE -	pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); -	paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); -	set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); -	pud = pud_offset(pgd, 0); -	if (pmd_table != pmd_offset(pud, 0))  -		BUG(); -#else +	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { +		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); + +		paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); +		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); +		pud = pud_offset(pgd, 0); +		if (pmd_table != pmd_offset(pud, 0)) +			BUG(); +	} +#endif  	pud = pud_offset(pgd, 0);  	pmd_table = pmd_offset(pud, 0); -#endif -  	return pmd_table;  } @@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)   */  static pte_t * __init one_page_table_init(pmd_t *pmd)  { -	if (pmd_none(*pmd)) { +	if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {  		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); +  		paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);  		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); -		if (page_table != pte_offset_kernel(pmd, 0)) -			BUG();	 - -		return page_table; +		BUG_ON(page_table != pte_offset_kernel(pmd, 0));  	}  	return pte_offset_kernel(pmd, 0); @@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)  static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)  {  	pgd_t *pgd; -	pud_t *pud;  	pmd_t *pmd;  	int pgd_idx, pmd_idx;  	unsigned long vaddr; @@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end  	pgd = pgd_base + pgd_idx;  	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { -		if (pgd_none(*pgd))  -			one_md_table_init(pgd); -		pud = pud_offset(pgd, vaddr); -		pmd = pmd_offset(pud, vaddr); +		pmd = one_md_table_init(pgd); +		pmd = pmd + pmd_index(vaddr);  		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { -			if (pmd_none(*pmd))  -				one_page_table_init(pmd); +			one_page_table_init(pmd);  			vaddr += PMD_SIZE;  		} @@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)  			/* Map with big pages if possible, otherwise create normal page tables. */  			if (cpu_has_pse) {  				unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; -  				if (is_kernel_text(address) || is_kernel_text(address2))  					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));  				else  					set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); +  				pfn += PTRS_PER_PTE;  			} else {  				pte = one_page_table_init(pmd); -				for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { -						if (is_kernel_text(address)) -							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); -						else -							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); +				for (pte_ofs = 0; +				     pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; +				     pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { +					if (is_kernel_text(address)) +						set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); +					else +						set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));  				}  			}  		} @@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void);  #define remap_numa_kva() do {} while (0)  #endif -static void __init pagetable_init (void) +void __init native_pagetable_setup_start(pgd_t *base)  { -	unsigned long vaddr; -	pgd_t *pgd_base = swapper_pg_dir; -  #ifdef CONFIG_X86_PAE  	int i; -	/* Init entries of the first-level page table to the zero page */ -	for (i = 0; i < PTRS_PER_PGD; i++) -		set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); + +	/* +	 * Init entries of the first-level page table to the +	 * zero page, if they haven't already been set up. +	 * +	 * In a normal native boot, we'll be running on a +	 * pagetable rooted in swapper_pg_dir, but not in PAE +	 * mode, so this will end up clobbering the mappings +	 * for the lower 24Mbytes of the address space, +	 * without affecting the kernel address space. +	 */ +	for (i = 0; i < USER_PTRS_PER_PGD; i++) +		set_pgd(&base[i], +			__pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); + +	/* Make sure kernel address space is empty so that a pagetable +	   will be allocated for it. */ +	memset(&base[USER_PTRS_PER_PGD], 0, +	       KERNEL_PGD_PTRS * sizeof(pgd_t));  #else  	paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);  #endif +} + +void __init native_pagetable_setup_done(pgd_t *base) +{ +#ifdef CONFIG_X86_PAE +	/* +	 * Add low memory identity-mappings - SMP needs it when +	 * starting up on an AP from real-mode. In the non-PAE +	 * case we already have these mappings through head.S. +	 * All user-space mappings are explicitly cleared after +	 * SMP startup. +	 */ +	set_pgd(&base[0], base[USER_PTRS_PER_PGD]); +#endif +} + +/* + * Build a proper pagetable for the kernel mappings.  Up until this + * point, we've been running on some set of pagetables constructed by + * the boot process. + * + * If we're booting on native hardware, this will be a pagetable + * constructed in arch/i386/kernel/head.S, and not running in PAE mode + * (even if we'll end up running in PAE).  The root of the pagetable + * will be swapper_pg_dir. + * + * If we're booting paravirtualized under a hypervisor, then there are + * more options: we may already be running PAE, and the pagetable may + * or may not be based in swapper_pg_dir.  In any case, + * paravirt_pagetable_setup_start() will set up swapper_pg_dir + * appropriately for the rest of the initialization to work. + * + * In general, pagetable_init() assumes that the pagetable may already + * be partially populated, and so it avoids stomping on any existing + * mappings. + */ +static void __init pagetable_init (void) +{ +	unsigned long vaddr, end; +	pgd_t *pgd_base = swapper_pg_dir; + +	paravirt_pagetable_setup_start(pgd_base);  	/* Enable PSE if available */ -	if (cpu_has_pse) { +	if (cpu_has_pse)  		set_in_cr4(X86_CR4_PSE); -	}  	/* Enable PGE if available */  	if (cpu_has_pge) { @@ -371,20 +424,12 @@ static void __init pagetable_init (void)  	 * created - mappings will be set by set_fixmap():  	 */  	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; -	page_table_range_init(vaddr, 0, pgd_base); +	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; +	page_table_range_init(vaddr, end, pgd_base);  	permanent_kmaps_init(pgd_base); -#ifdef CONFIG_X86_PAE -	/* -	 * Add low memory identity-mappings - SMP needs it when -	 * starting up on an AP from real-mode. In the non-PAE -	 * case we already have these mappings through head.S. -	 * All user-space mappings are explicitly cleared after -	 * SMP startup. -	 */ -	set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); -#endif +	paravirt_pagetable_setup_done(pgd_base);  }  #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) @@ -700,24 +745,31 @@ struct kmem_cache *pmd_cache;  void __init pgtable_cache_init(void)  { +	size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); +  	if (PTRS_PER_PMD > 1) {  		pmd_cache = kmem_cache_create("pmd",  					PTRS_PER_PMD*sizeof(pmd_t),  					PTRS_PER_PMD*sizeof(pmd_t), -					0, +					SLAB_PANIC,  					pmd_ctor,  					NULL); -		if (!pmd_cache) -			panic("pgtable_cache_init(): cannot create pmd cache"); +		if (!SHARED_KERNEL_PMD) { +			/* If we're in PAE mode and have a non-shared +			   kernel pmd, then the pgd size must be a +			   page size.  This is because the pgd_list +			   links through the page structure, so there +			   can only be one pgd per page for this to +			   work. */ +			pgd_size = PAGE_SIZE; +		}  	}  	pgd_cache = kmem_cache_create("pgd", -				PTRS_PER_PGD*sizeof(pgd_t), -				PTRS_PER_PGD*sizeof(pgd_t), -				0, +				pgd_size, +				pgd_size, +				SLAB_PANIC,  				pgd_ctor, -				PTRS_PER_PMD == 1 ? pgd_dtor : NULL); -	if (!pgd_cache) -		panic("pgtable_cache_init(): Cannot create pgd cache"); +				(!SHARED_KERNEL_PMD) ? pgd_dtor : NULL);  }  /* @@ -751,13 +803,25 @@ static int noinline do_test_wp_bit(void)  void mark_rodata_ro(void)  { -	unsigned long addr = (unsigned long)__start_rodata; +	unsigned long start = PFN_ALIGN(_text); +	unsigned long size = PFN_ALIGN(_etext) - start; -	for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) -		change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); +#ifdef CONFIG_HOTPLUG_CPU +	/* It must still be possible to apply SMP alternatives. */ +	if (num_possible_cpus() <= 1) +#endif +	{ +		change_page_attr(virt_to_page(start), +		                 size >> PAGE_SHIFT, PAGE_KERNEL_RX); +		printk("Write protecting the kernel text: %luk\n", size >> 10); +	} -	printk("Write protecting the kernel read-only data: %uk\n", -			(__end_rodata - __start_rodata) >> 10); +	start += size; +	size = (unsigned long)__end_rodata - start; +	change_page_attr(virt_to_page(start), +	                 size >> PAGE_SHIFT, PAGE_KERNEL_RO); +	printk("Write protecting the kernel read-only data: %luk\n", +	       size >> 10);  	/*  	 * change_page_attr() requires a global_flush_tlb() call after it. @@ -780,7 +844,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)  		free_page(addr);  		totalram_pages++;  	} -	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); +	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);  }  void free_initmem(void)  | 
