diff options
Diffstat (limited to 'arch/x86/mm/init.c')
| -rw-r--r-- | arch/x86/mm/init.c | 636 | 
1 files changed, 457 insertions, 179 deletions
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c0e28a13de7..f9713061811 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -3,6 +3,7 @@  #include <linux/ioport.h>  #include <linux/swap.h>  #include <linux/memblock.h> +#include <linux/bootmem.h>	/* for max_low_pfn */  #include <asm/cacheflush.h>  #include <asm/e820.h> @@ -11,83 +12,103 @@  #include <asm/page_types.h>  #include <asm/sections.h>  #include <asm/setup.h> -#include <asm/system.h>  #include <asm/tlbflush.h>  #include <asm/tlb.h>  #include <asm/proto.h> +#include <asm/dma.h>		/* for MAX_DMA_PFN */ +#include <asm/microcode.h> -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +#include "mm_internal.h" -unsigned long __initdata e820_table_start; -unsigned long __meminitdata e820_table_end; -unsigned long __meminitdata e820_table_top; +static unsigned long __initdata pgt_buf_start; +static unsigned long __initdata pgt_buf_end; +static unsigned long __initdata pgt_buf_top; -int after_bootmem; +static unsigned long min_pfn_mapped; -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES -				= 1 -#endif -; +static bool __initdata can_use_brk_pgt = true; -static void __init find_early_table_space(unsigned long end, int use_pse, -					  int use_gbpages) +/* + * Pages returned are already directly mapped. + * + * Changing that is likely to break Xen, see commit: + * + *    279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve + * + * for detailed information. + */ +__ref void *alloc_low_pages(unsigned int num)  { -	unsigned long puds, pmds, ptes, tables, start; -	phys_addr_t base; +	unsigned long pfn; +	int i; -	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; -	tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); +	if (after_bootmem) { +		unsigned int order; -	if (use_gbpages) { -		unsigned long extra; +		order = get_order((unsigned long)num << PAGE_SHIFT); +		return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | +						__GFP_ZERO, order); +	} -		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); -		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; -	} else -		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; +	if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { +		unsigned long ret; +		if (min_pfn_mapped >= max_pfn_mapped) +			panic("alloc_low_pages: ran out of memory"); +		ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, +					max_pfn_mapped << PAGE_SHIFT, +					PAGE_SIZE * num , PAGE_SIZE); +		if (!ret) +			panic("alloc_low_pages: can not alloc memory"); +		memblock_reserve(ret, PAGE_SIZE * num); +		pfn = ret >> PAGE_SHIFT; +	} else { +		pfn = pgt_buf_end; +		pgt_buf_end += num; +		printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", +			pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); +	} -	tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); +	for (i = 0; i < num; i++) { +		void *adr; -	if (use_pse) { -		unsigned long extra; +		adr = __va((pfn + i) << PAGE_SHIFT); +		clear_page(adr); +	} -		extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); -#ifdef CONFIG_X86_32 -		extra += PMD_SIZE; -#endif -		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; -	} else -		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; +	return __va(pfn << PAGE_SHIFT); +} -	tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); +/* need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS */ +#define INIT_PGT_BUF_SIZE	(6 * PAGE_SIZE) +RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); +void  __init early_alloc_pgt_buf(void) +{ +	unsigned long tables = INIT_PGT_BUF_SIZE; +	phys_addr_t base; -#ifdef CONFIG_X86_32 -	/* for fixmap */ -	tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif +	base = __pa(extend_brk(tables, PAGE_SIZE)); -	/* -	 * RED-PEN putting page tables only on node 0 could -	 * cause a hotspot and fill up ZONE_DMA. The page tables -	 * need roughly 0.5KB per GB. -	 */ -#ifdef CONFIG_X86_32 -	start = 0x7000; -#else -	start = 0x8000; -#endif -	base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT, -					tables, PAGE_SIZE); -	if (base == MEMBLOCK_ERROR) -		panic("Cannot find space for the kernel page tables"); +	pgt_buf_start = base >> PAGE_SHIFT; +	pgt_buf_end = pgt_buf_start; +	pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); +} + +int after_bootmem; -	e820_table_start = base >> PAGE_SHIFT; -	e820_table_end = e820_table_start; -	e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); +int direct_gbpages +#ifdef CONFIG_DIRECT_GBPAGES +				= 1 +#endif +; -	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", -		end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); +static void __init init_gbpages(void) +{ +#ifdef CONFIG_X86_64 +	if (direct_gbpages && cpu_has_gbpages) +		printk(KERN_INFO "Using GB pages for direct mapping\n"); +	else +		direct_gbpages = 0; +#endif  }  struct map_range { @@ -96,6 +117,35 @@ struct map_range {  	unsigned page_size_mask;  }; +static int page_size_mask; + +static void __init probe_page_size_mask(void) +{ +	init_gbpages(); + +#if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) +	/* +	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. +	 * This will simplify cpa(), which otherwise needs to support splitting +	 * large pages into small in interrupt context, etc. +	 */ +	if (direct_gbpages) +		page_size_mask |= 1 << PG_LEVEL_1G; +	if (cpu_has_pse) +		page_size_mask |= 1 << PG_LEVEL_2M; +#endif + +	/* Enable PSE if available */ +	if (cpu_has_pse) +		set_in_cr4(X86_CR4_PSE); + +	/* Enable PGE if available */ +	if (cpu_has_pge) { +		set_in_cr4(X86_CR4_PGE); +		__supported_pte_mask |= _PAGE_GLOBAL; +	} +} +  #ifdef CONFIG_X86_32  #define NR_RANGE_MR 3  #else /* CONFIG_X86_64 */ @@ -119,57 +169,51 @@ static int __meminit save_mr(struct map_range *mr, int nr_range,  }  /* - * Setup the direct mapping of the physical memory at PAGE_OFFSET. - * This runs before bootmem is initialized and gets pages directly from - * the physical memory. To access them they are temporarily mapped. + * adjust the page_size_mask for small range to go with + *	big page size instead small one if nearby are ram too.   */ -unsigned long __init_refok init_memory_mapping(unsigned long start, -					       unsigned long end) +static void __init_refok adjust_range_page_size_mask(struct map_range *mr, +							 int nr_range)  { -	unsigned long page_size_mask = 0; -	unsigned long start_pfn, end_pfn; -	unsigned long ret = 0; -	unsigned long pos; +	int i; -	struct map_range mr[NR_RANGE_MR]; -	int nr_range, i; -	int use_pse, use_gbpages; +	for (i = 0; i < nr_range; i++) { +		if ((page_size_mask & (1<<PG_LEVEL_2M)) && +		    !(mr[i].page_size_mask & (1<<PG_LEVEL_2M))) { +			unsigned long start = round_down(mr[i].start, PMD_SIZE); +			unsigned long end = round_up(mr[i].end, PMD_SIZE); -	printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); - -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) -	/* -	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. -	 * This will simplify cpa(), which otherwise needs to support splitting -	 * large pages into small in interrupt context, etc. -	 */ -	use_pse = use_gbpages = 0; -#else -	use_pse = cpu_has_pse; -	use_gbpages = direct_gbpages; +#ifdef CONFIG_X86_32 +			if ((end >> PAGE_SHIFT) > max_low_pfn) +				continue;  #endif -	/* Enable PSE if available */ -	if (cpu_has_pse) -		set_in_cr4(X86_CR4_PSE); - -	/* Enable PGE if available */ -	if (cpu_has_pge) { -		set_in_cr4(X86_CR4_PGE); -		__supported_pte_mask |= _PAGE_GLOBAL; +			if (memblock_is_region_memory(start, end - start)) +				mr[i].page_size_mask |= 1<<PG_LEVEL_2M; +		} +		if ((page_size_mask & (1<<PG_LEVEL_1G)) && +		    !(mr[i].page_size_mask & (1<<PG_LEVEL_1G))) { +			unsigned long start = round_down(mr[i].start, PUD_SIZE); +			unsigned long end = round_up(mr[i].end, PUD_SIZE); + +			if (memblock_is_region_memory(start, end - start)) +				mr[i].page_size_mask |= 1<<PG_LEVEL_1G; +		}  	} +} -	if (use_gbpages) -		page_size_mask |= 1 << PG_LEVEL_1G; -	if (use_pse) -		page_size_mask |= 1 << PG_LEVEL_2M; +static int __meminit split_mem_range(struct map_range *mr, int nr_range, +				     unsigned long start, +				     unsigned long end) +{ +	unsigned long start_pfn, end_pfn, limit_pfn; +	unsigned long pfn; +	int i; -	memset(mr, 0, sizeof(mr)); -	nr_range = 0; +	limit_pfn = PFN_DOWN(end);  	/* head if not big page alignment ? */ -	start_pfn = start >> PAGE_SHIFT; -	pos = start_pfn << PAGE_SHIFT; +	pfn = start_pfn = PFN_DOWN(start);  #ifdef CONFIG_X86_32  	/*  	 * Don't use a large page for the first 2/4MB of memory @@ -177,68 +221,65 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,  	 * and overlapping MTRRs into large pages can cause  	 * slowdowns.  	 */ -	if (pos == 0) -		end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); +	if (pfn == 0) +		end_pfn = PFN_DOWN(PMD_SIZE);  	else -		end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) -				 << (PMD_SHIFT - PAGE_SHIFT); +		end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));  #else /* CONFIG_X86_64 */ -	end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) -			<< (PMD_SHIFT - PAGE_SHIFT); +	end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));  #endif -	if (end_pfn > (end >> PAGE_SHIFT)) -		end_pfn = end >> PAGE_SHIFT; +	if (end_pfn > limit_pfn) +		end_pfn = limit_pfn;  	if (start_pfn < end_pfn) {  		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); -		pos = end_pfn << PAGE_SHIFT; +		pfn = end_pfn;  	}  	/* big page (2M) range */ -	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) -			 << (PMD_SHIFT - PAGE_SHIFT); +	start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE));  #ifdef CONFIG_X86_32 -	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +	end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));  #else /* CONFIG_X86_64 */ -	end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) -			 << (PUD_SHIFT - PAGE_SHIFT); -	if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) -		end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); +	end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); +	if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE))) +		end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));  #endif  	if (start_pfn < end_pfn) {  		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,  				page_size_mask & (1<<PG_LEVEL_2M)); -		pos = end_pfn << PAGE_SHIFT; +		pfn = end_pfn;  	}  #ifdef CONFIG_X86_64  	/* big page (1G) range */ -	start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) -			 << (PUD_SHIFT - PAGE_SHIFT); -	end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); +	start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); +	end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE));  	if (start_pfn < end_pfn) {  		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,  				page_size_mask &  				 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); -		pos = end_pfn << PAGE_SHIFT; +		pfn = end_pfn;  	}  	/* tail is not big page (1G) alignment */ -	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) -			 << (PMD_SHIFT - PAGE_SHIFT); -	end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); +	start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); +	end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE));  	if (start_pfn < end_pfn) {  		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,  				page_size_mask & (1<<PG_LEVEL_2M)); -		pos = end_pfn << PAGE_SHIFT; +		pfn = end_pfn;  	}  #endif  	/* tail is not big page (2M) alignment */ -	start_pfn = pos>>PAGE_SHIFT; -	end_pfn = end>>PAGE_SHIFT; +	start_pfn = pfn; +	end_pfn = limit_pfn;  	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); +	if (!after_bootmem) +		adjust_range_page_size_mask(mr, nr_range); +  	/* try to merge same page size and continuous */  	for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {  		unsigned long old_start; @@ -254,62 +295,279 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,  	}  	for (i = 0; i < nr_range; i++) -		printk(KERN_DEBUG " %010lx - %010lx page %s\n", -				mr[i].start, mr[i].end, +		printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n", +				mr[i].start, mr[i].end - 1,  			(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(  			 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); -	/* -	 * Find space for the kernel direct mapping tables. -	 * -	 * Later we should allocate these tables in the local node of the -	 * memory mapped. Unfortunately this is done currently before the -	 * nodes are discovered. -	 */ -	if (!after_bootmem) -		find_early_table_space(end, use_pse, use_gbpages); +	return nr_range; +} + +struct range pfn_mapped[E820_X_MAX]; +int nr_pfn_mapped; + +static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) +{ +	nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, +					     nr_pfn_mapped, start_pfn, end_pfn); +	nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX); + +	max_pfn_mapped = max(max_pfn_mapped, end_pfn); + +	if (start_pfn < (1UL<<(32-PAGE_SHIFT))) +		max_low_pfn_mapped = max(max_low_pfn_mapped, +					 min(end_pfn, 1UL<<(32-PAGE_SHIFT))); +} + +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) +{ +	int i; + +	for (i = 0; i < nr_pfn_mapped; i++) +		if ((start_pfn >= pfn_mapped[i].start) && +		    (end_pfn <= pfn_mapped[i].end)) +			return true; + +	return false; +} + +/* + * Setup the direct mapping of the physical memory at PAGE_OFFSET. + * This runs before bootmem is initialized and gets pages directly from + * the physical memory. To access them they are temporarily mapped. + */ +unsigned long __init_refok init_memory_mapping(unsigned long start, +					       unsigned long end) +{ +	struct map_range mr[NR_RANGE_MR]; +	unsigned long ret = 0; +	int nr_range, i; + +	pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n", +	       start, end - 1); + +	memset(mr, 0, sizeof(mr)); +	nr_range = split_mem_range(mr, 0, start, end);  	for (i = 0; i < nr_range; i++)  		ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,  						   mr[i].page_size_mask); -#ifdef CONFIG_X86_32 -	early_ioremap_page_table_range_init(); +	add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); -	load_cr3(swapper_pg_dir); -#endif +	return ret >> PAGE_SHIFT; +} -#ifdef CONFIG_X86_64 -	if (!after_bootmem && !start) { -		pud_t *pud; -		pmd_t *pmd; +/* + * We need to iterate through the E820 memory map and create direct mappings + * for only E820_RAM and E820_KERN_RESERVED regions. We cannot simply + * create direct mappings for all pfns from [0 to max_low_pfn) and + * [4GB to max_pfn) because of possible memory holes in high addresses + * that cannot be marked as UC by fixed/variable range MTRRs. + * Depending on the alignment of E820 ranges, this may possibly result + * in using smaller size (i.e. 4K instead of 2M or 1G) page tables. + * + * init_mem_mapping() calls init_range_memory_mapping() with big range. + * That range would have hole in the middle or ends, and only ram parts + * will be mapped in init_range_memory_mapping(). + */ +static unsigned long __init init_range_memory_mapping( +					   unsigned long r_start, +					   unsigned long r_end) +{ +	unsigned long start_pfn, end_pfn; +	unsigned long mapped_ram_size = 0; +	int i; -		mmu_cr4_features = read_cr4(); +	for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { +		u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); +		u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); +		if (start >= end) +			continue;  		/* -		 * _brk_end cannot change anymore, but it and _end may be -		 * located on different 2M pages. cleanup_highmap(), however, -		 * can only consider _end when it runs, so destroy any -		 * mappings beyond _brk_end here. +		 * if it is overlapping with brk pgt, we need to +		 * alloc pgt buf from memblock instead.  		 */ -		pud = pud_offset(pgd_offset_k(_brk_end), _brk_end); -		pmd = pmd_offset(pud, _brk_end - 1); -		while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) -			pmd_clear(pmd); +		can_use_brk_pgt = max(start, (u64)pgt_buf_end<<PAGE_SHIFT) >= +				    min(end, (u64)pgt_buf_top<<PAGE_SHIFT); +		init_memory_mapping(start, end); +		mapped_ram_size += end - start; +		can_use_brk_pgt = true;  	} -#endif -	__flush_tlb_all(); -	if (!after_bootmem && e820_table_end > e820_table_start) -		memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT, -				 e820_table_end << PAGE_SHIFT, "PGTABLE"); +	return mapped_ram_size; +} -	if (!after_bootmem) -		early_memtest(start, end); +static unsigned long __init get_new_step_size(unsigned long step_size) +{ +	/* +	 * Explain why we shift by 5 and why we don't have to worry about +	 * 'step_size << 5' overflowing: +	 * +	 * initial mapped size is PMD_SIZE (2M). +	 * We can not set step_size to be PUD_SIZE (1G) yet. +	 * In worse case, when we cross the 1G boundary, and +	 * PG_LEVEL_2M is not set, we will need 1+1+512 pages (2M + 8k) +	 * to map 1G range with PTE. Use 5 as shift for now. +	 * +	 * Don't need to worry about overflow, on 32bit, when step_size +	 * is 0, round_down() returns 0 for start, and that turns it +	 * into 0x100000000ULL. +	 */ +	return step_size << 5; +} -	return ret >> PAGE_SHIFT; +/** + * memory_map_top_down - Map [map_start, map_end) top down + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in top-down. That said, the page tables + * will be allocated at the end of the memory, and we map the + * memory in top-down. + */ +static void __init memory_map_top_down(unsigned long map_start, +				       unsigned long map_end) +{ +	unsigned long real_end, start, last_start; +	unsigned long step_size; +	unsigned long addr; +	unsigned long mapped_ram_size = 0; +	unsigned long new_mapped_ram_size; + +	/* xen has big range in reserved near end of ram, skip it at first.*/ +	addr = memblock_find_in_range(map_start, map_end, PMD_SIZE, PMD_SIZE); +	real_end = addr + PMD_SIZE; + +	/* step_size need to be small so pgt_buf from BRK could cover it */ +	step_size = PMD_SIZE; +	max_pfn_mapped = 0; /* will get exact value next */ +	min_pfn_mapped = real_end >> PAGE_SHIFT; +	last_start = start = real_end; + +	/* +	 * We start from the top (end of memory) and go to the bottom. +	 * The memblock_find_in_range() gets us a block of RAM from the +	 * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages +	 * for page table. +	 */ +	while (last_start > map_start) { +		if (last_start > step_size) { +			start = round_down(last_start - 1, step_size); +			if (start < map_start) +				start = map_start; +		} else +			start = map_start; +		new_mapped_ram_size = init_range_memory_mapping(start, +							last_start); +		last_start = start; +		min_pfn_mapped = last_start >> PAGE_SHIFT; +		/* only increase step_size after big range get mapped */ +		if (new_mapped_ram_size > mapped_ram_size) +			step_size = get_new_step_size(step_size); +		mapped_ram_size += new_mapped_ram_size; +	} + +	if (real_end < map_end) +		init_range_memory_mapping(real_end, map_end);  } +/** + * memory_map_bottom_up - Map [map_start, map_end) bottom up + * @map_start: start address of the target memory range + * @map_end: end address of the target memory range + * + * This function will setup direct mapping for memory range + * [map_start, map_end) in bottom-up. Since we have limited the + * bottom-up allocation above the kernel, the page tables will + * be allocated just above the kernel and we map the memory + * in [map_start, map_end) in bottom-up. + */ +static void __init memory_map_bottom_up(unsigned long map_start, +					unsigned long map_end) +{ +	unsigned long next, new_mapped_ram_size, start; +	unsigned long mapped_ram_size = 0; +	/* step_size need to be small so pgt_buf from BRK could cover it */ +	unsigned long step_size = PMD_SIZE; + +	start = map_start; +	min_pfn_mapped = start >> PAGE_SHIFT; + +	/* +	 * We start from the bottom (@map_start) and go to the top (@map_end). +	 * The memblock_find_in_range() gets us a block of RAM from the +	 * end of RAM in [min_pfn_mapped, max_pfn_mapped) used as new pages +	 * for page table. +	 */ +	while (start < map_end) { +		if (map_end - start > step_size) { +			next = round_up(start + 1, step_size); +			if (next > map_end) +				next = map_end; +		} else +			next = map_end; + +		new_mapped_ram_size = init_range_memory_mapping(start, next); +		start = next; + +		if (new_mapped_ram_size > mapped_ram_size) +			step_size = get_new_step_size(step_size); +		mapped_ram_size += new_mapped_ram_size; +	} +} + +void __init init_mem_mapping(void) +{ +	unsigned long end; + +	probe_page_size_mask(); + +#ifdef CONFIG_X86_64 +	end = max_pfn << PAGE_SHIFT; +#else +	end = max_low_pfn << PAGE_SHIFT; +#endif + +	/* the ISA range is always mapped regardless of memory holes */ +	init_memory_mapping(0, ISA_END_ADDRESS); + +	/* +	 * If the allocation is in bottom-up direction, we setup direct mapping +	 * in bottom-up, otherwise we setup direct mapping in top-down. +	 */ +	if (memblock_bottom_up()) { +		unsigned long kernel_end = __pa_symbol(_end); + +		/* +		 * we need two separate calls here. This is because we want to +		 * allocate page tables above the kernel. So we first map +		 * [kernel_end, end) to make memory above the kernel be mapped +		 * as soon as possible. And then use page tables allocated above +		 * the kernel to map [ISA_END_ADDRESS, kernel_end). +		 */ +		memory_map_bottom_up(kernel_end, end); +		memory_map_bottom_up(ISA_END_ADDRESS, kernel_end); +	} else { +		memory_map_top_down(ISA_END_ADDRESS, end); +	} + +#ifdef CONFIG_X86_64 +	if (max_pfn > max_low_pfn) { +		/* can we preseve max_low_pfn ?*/ +		max_low_pfn = max_pfn; +	} +#else +	early_ioremap_page_table_range_init(); +#endif + +	load_cr3(swapper_pg_dir); +	__flush_tlb_all(); + +	early_memtest(0, max_pfn_mapped << PAGE_SHIFT); +}  /*   * devmem_is_allowed() checks to see if /dev/mem access to a certain address @@ -323,7 +581,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,   */  int devmem_is_allowed(unsigned long pagenr)  { -	if (pagenr <= 256) +	if (pagenr < 256)  		return 1;  	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))  		return 0; @@ -334,7 +592,6 @@ int devmem_is_allowed(unsigned long pagenr)  void free_init_pages(char *what, unsigned long begin, unsigned long end)  { -	unsigned long addr;  	unsigned long begin_aligned, end_aligned;  	/* Make sure boundaries are page aligned */ @@ -349,47 +606,47 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)  	if (begin >= end)  		return; -	addr = begin; -  	/*  	 * If debugging page accesses then do not free this memory but  	 * mark them not present - any buggy init-section access will  	 * create a kernel page fault:  	 */  #ifdef CONFIG_DEBUG_PAGEALLOC -	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", -		begin, end); +	printk(KERN_INFO "debug: unmapping init [mem %#010lx-%#010lx]\n", +		begin, end - 1);  	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);  #else  	/*  	 * We just marked the kernel text read only above, now that  	 * we are going to free part of that, we need to make that -	 * writeable first. +	 * writeable and non-executable first.  	 */ +	set_memory_nx(begin, (end - begin) >> PAGE_SHIFT);  	set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); -	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); - -	for (; addr < end; addr += PAGE_SIZE) { -		ClearPageReserved(virt_to_page(addr)); -		init_page_count(virt_to_page(addr)); -		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); -		free_page(addr); -		totalram_pages++; -	} +	free_reserved_area((void *)begin, (void *)end, POISON_FREE_INITMEM, what);  #endif  }  void free_initmem(void)  { -	free_init_pages("unused kernel memory", +	free_init_pages("unused kernel",  			(unsigned long)(&__init_begin),  			(unsigned long)(&__init_end));  }  #ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end)  { +#ifdef CONFIG_MICROCODE_EARLY +	/* +	 * Remember, initrd memory may contain microcode or other useful things. +	 * Before we lose initrd mem, we need to find a place to hold them +	 * now that normal virtual memory is enabled. +	 */ +	save_microcode_in_initrd(); +#endif +  	/*  	 * end could be not aligned, and We can not align that,  	 * decompresser could be confused by aligned initrd_end @@ -399,6 +656,27 @@ void free_initrd_mem(unsigned long start, unsigned long end)  	 *   - relocate_initrd()  	 * So here We can do PAGE_ALIGN() safely to get partial page to be freed  	 */ -	free_init_pages("initrd memory", start, PAGE_ALIGN(end)); +	free_init_pages("initrd", start, PAGE_ALIGN(end));  }  #endif + +void __init zone_sizes_init(void) +{ +	unsigned long max_zone_pfns[MAX_NR_ZONES]; + +	memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + +#ifdef CONFIG_ZONE_DMA +	max_zone_pfns[ZONE_DMA]		= MAX_DMA_PFN; +#endif +#ifdef CONFIG_ZONE_DMA32 +	max_zone_pfns[ZONE_DMA32]	= MAX_DMA32_PFN; +#endif +	max_zone_pfns[ZONE_NORMAL]	= max_low_pfn; +#ifdef CONFIG_HIGHMEM +	max_zone_pfns[ZONE_HIGHMEM]	= max_pfn; +#endif + +	free_area_init_nodes(max_zone_pfns); +} +  | 
