diff options
Diffstat (limited to 'mm/memblock.c')
| -rw-r--r-- | mm/memblock.c | 1653 | 
1 files changed, 1201 insertions, 452 deletions
diff --git a/mm/memblock.c b/mm/memblock.c index 400dc62697d..6d2f219a48b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,15 +20,47 @@  #include <linux/seq_file.h>  #include <linux/memblock.h> -struct memblock memblock __initdata_memblock; +#include <asm-generic/sections.h> +#include <linux/io.h> + +#include "internal.h" + +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; +#endif + +struct memblock memblock __initdata_memblock = { +	.memory.regions		= memblock_memory_init_regions, +	.memory.cnt		= 1,	/* empty dummy entry */ +	.memory.max		= INIT_MEMBLOCK_REGIONS, + +	.reserved.regions	= memblock_reserved_init_regions, +	.reserved.cnt		= 1,	/* empty dummy entry */ +	.reserved.max		= INIT_MEMBLOCK_REGIONS, + +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +	.physmem.regions	= memblock_physmem_init_regions, +	.physmem.cnt		= 1,	/* empty dummy entry */ +	.physmem.max		= INIT_PHYSMEM_REGIONS, +#endif + +	.bottom_up		= false, +	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE, +};  int memblock_debug __initdata_memblock; -int memblock_can_resize __initdata_memblock; -static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; +#ifdef CONFIG_MOVABLE_NODE +bool movable_node_enabled __initdata_memblock = false; +#endif +static int memblock_can_resize __initdata_memblock; +static int memblock_memory_in_slab __initdata_memblock = 0; +static int memblock_reserved_in_slab __initdata_memblock = 0;  /* inline so we don't get a warning when pr_debug is compiled out */ -static inline const char *memblock_type_name(struct memblock_type *type) +static __init_memblock const char * +memblock_type_name(struct memblock_type *type)  {  	if (type == &memblock.memory)  		return "memory"; @@ -38,49 +70,23 @@ static inline const char *memblock_type_name(struct memblock_type *type)  		return "unknown";  } -/* - * Address comparison utilities - */ - -static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size) -{ -	return addr & ~(size - 1); -} - -static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size) +/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ +static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)  { -	return (addr + (size - 1)) & ~(size - 1); +	return *size = min(*size, (phys_addr_t)ULLONG_MAX - base);  } +/* + * Address comparison utilities + */  static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,  				       phys_addr_t base2, phys_addr_t size2)  {  	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));  } -static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, -			       phys_addr_t base2, phys_addr_t size2) -{ -	if (base2 == base1 + size1) -		return 1; -	else if (base1 == base2 + size2) -		return -1; - -	return 0; -} - -static long __init_memblock memblock_regions_adjacent(struct memblock_type *type, -				 unsigned long r1, unsigned long r2) -{ -	phys_addr_t base1 = type->regions[r1].base; -	phys_addr_t size1 = type->regions[r1].size; -	phys_addr_t base2 = type->regions[r2].base; -	phys_addr_t size2 = type->regions[r2].size; - -	return memblock_addrs_adjacent(base1, size1, base2, size2); -} - -long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static long __init_memblock memblock_overlaps_region(struct memblock_type *type, +					phys_addr_t base, phys_addr_t size)  {  	unsigned long i; @@ -95,137 +101,232 @@ long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_a  }  /* - * Find, allocate, deallocate or reserve unreserved regions. All allocations - * are top-down. + * __memblock_find_range_bottom_up - find free area utility in bottom-up + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Utility called from memblock_find_in_range_node(), find free area bottom-up. + * + * RETURNS: + * Found address on success, 0 on failure.   */ - -static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end, -					  phys_addr_t size, phys_addr_t align) +static phys_addr_t __init_memblock +__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, +				phys_addr_t size, phys_addr_t align, int nid)  { -	phys_addr_t base, res_base; -	long j; - -	/* In case, huge size is requested */ -	if (end < size) -		return MEMBLOCK_ERROR; +	phys_addr_t this_start, this_end, cand; +	u64 i; -	base = memblock_align_down((end - size), align); +	for_each_free_mem_range(i, nid, &this_start, &this_end, NULL) { +		this_start = clamp(this_start, start, end); +		this_end = clamp(this_end, start, end); -	/* Prevent allocations returning 0 as it's also used to -	 * indicate an allocation failure -	 */ -	if (start == 0) -		start = PAGE_SIZE; - -	while (start <= base) { -		j = memblock_overlaps_region(&memblock.reserved, base, size); -		if (j < 0) -			return base; -		res_base = memblock.reserved.regions[j].base; -		if (res_base < size) -			break; -		base = memblock_align_down(res_base - size, align); +		cand = round_up(this_start, align); +		if (cand < this_end && this_end - cand >= size) +			return cand;  	} -	return MEMBLOCK_ERROR; +	return 0;  } -static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, -			phys_addr_t align, phys_addr_t start, phys_addr_t end) +/** + * __memblock_find_range_top_down - find free area utility, in top-down + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Utility called from memblock_find_in_range_node(), find free area top-down. + * + * RETURNS: + * Found address on success, 0 on failure. + */ +static phys_addr_t __init_memblock +__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, +			       phys_addr_t size, phys_addr_t align, int nid)  { -	long i; +	phys_addr_t this_start, this_end, cand; +	u64 i; + +	for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { +		this_start = clamp(this_start, start, end); +		this_end = clamp(this_end, start, end); + +		if (this_end < size) +			continue; + +		cand = round_down(this_end - size, align); +		if (cand >= this_start) +			return cand; +	} -	BUG_ON(0 == size); +	return 0; +} -	size = memblock_align_up(size, align); +/** + * memblock_find_in_range_node - find free area in given range and node + * @size: size of free area to find + * @align: alignment of free area to find + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Find @size free area aligned to @align in the specified range and node. + * + * When allocation direction is bottom-up, the @start should be greater + * than the end of the kernel image. Otherwise, it will be trimmed. The + * reason is that we want the bottom-up allocation just near the kernel + * image so it is highly likely that the allocated memory and the kernel + * will reside in the same node. + * + * If bottom-up allocation failed, will try to allocate memory top-down. + * + * RETURNS: + * Found address on success, 0 on failure. + */ +phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, +					phys_addr_t align, phys_addr_t start, +					phys_addr_t end, int nid) +{ +	int ret; +	phys_addr_t kernel_end; -	/* Pump up max_addr */ +	/* pump up @end */  	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)  		end = memblock.current_limit; -	/* We do a top-down search, this tends to limit memory -	 * fragmentation by keeping early boot allocs near the -	 * top of memory +	/* avoid allocating the first page */ +	start = max_t(phys_addr_t, start, PAGE_SIZE); +	end = max(start, end); +	kernel_end = __pa_symbol(_end); + +	/* +	 * try bottom-up allocation only when bottom-up mode +	 * is set and @end is above the kernel image.  	 */ -	for (i = memblock.memory.cnt - 1; i >= 0; i--) { -		phys_addr_t memblockbase = memblock.memory.regions[i].base; -		phys_addr_t memblocksize = memblock.memory.regions[i].size; -		phys_addr_t bottom, top, found; +	if (memblock_bottom_up() && end > kernel_end) { +		phys_addr_t bottom_up_start; -		if (memblocksize < size) -			continue; -		if ((memblockbase + memblocksize) <= start) -			break; -		bottom = max(memblockbase, start); -		top = min(memblockbase + memblocksize, end); -		if (bottom >= top) -			continue; -		found = memblock_find_region(bottom, top, size, align); -		if (found != MEMBLOCK_ERROR) -			return found; +		/* make sure we will allocate above the kernel */ +		bottom_up_start = max(start, kernel_end); + +		/* ok, try bottom-up allocation first */ +		ret = __memblock_find_range_bottom_up(bottom_up_start, end, +						      size, align, nid); +		if (ret) +			return ret; + +		/* +		 * we always limit bottom-up allocation above the kernel, +		 * but top-down allocation doesn't have the limit, so +		 * retrying top-down allocation may succeed when bottom-up +		 * allocation failed. +		 * +		 * bottom-up allocation is expected to be fail very rarely, +		 * so we use WARN_ONCE() here to see the stack trace if +		 * fail happens. +		 */ +		WARN_ONCE(1, "memblock: bottom-up allocation failed, " +			     "memory hotunplug may be affected\n");  	} -	return MEMBLOCK_ERROR; + +	return __memblock_find_range_top_down(start, end, size, align, nid);  } -/* - * Find a free area with specified alignment in a specific range. +/** + * memblock_find_in_range - find free area in given range + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * + * Find @size free area aligned to @align in the specified range. + * + * RETURNS: + * Found address on success, 0 on failure.   */ -u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) +phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, +					phys_addr_t end, phys_addr_t size, +					phys_addr_t align)  { -	return memblock_find_base(size, align, start, end); +	return memblock_find_in_range_node(size, align, start, end, +					    NUMA_NO_NODE);  } -/* - * Free memblock.reserved.regions - */ -int __init_memblock memblock_free_reserved_regions(void) +static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)  { -	if (memblock.reserved.regions == memblock_reserved_init_regions) -		return 0; +	type->total_size -= type->regions[r].size; +	memmove(&type->regions[r], &type->regions[r + 1], +		(type->cnt - (r + 1)) * sizeof(type->regions[r])); +	type->cnt--; -	return memblock_free(__pa(memblock.reserved.regions), -		 sizeof(struct memblock_region) * memblock.reserved.max); +	/* Special case for empty arrays */ +	if (type->cnt == 0) { +		WARN_ON(type->total_size != 0); +		type->cnt = 1; +		type->regions[0].base = 0; +		type->regions[0].size = 0; +		type->regions[0].flags = 0; +		memblock_set_region_node(&type->regions[0], MAX_NUMNODES); +	}  } -/* - * Reserve memblock.reserved.regions - */ -int __init_memblock memblock_reserve_reserved_regions(void) +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + +phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( +					phys_addr_t *addr)  {  	if (memblock.reserved.regions == memblock_reserved_init_regions)  		return 0; -	return memblock_reserve(__pa(memblock.reserved.regions), -		 sizeof(struct memblock_region) * memblock.reserved.max); +	*addr = __pa(memblock.reserved.regions); + +	return PAGE_ALIGN(sizeof(struct memblock_region) * +			  memblock.reserved.max);  } -static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) +phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( +					phys_addr_t *addr)  { -	unsigned long i; +	if (memblock.memory.regions == memblock_memory_init_regions) +		return 0; -	for (i = r; i < type->cnt - 1; i++) { -		type->regions[i].base = type->regions[i + 1].base; -		type->regions[i].size = type->regions[i + 1].size; -	} -	type->cnt--; -} +	*addr = __pa(memblock.memory.regions); -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void __init_memblock memblock_coalesce_regions(struct memblock_type *type, -		unsigned long r1, unsigned long r2) -{ -	type->regions[r1].size += type->regions[r2].size; -	memblock_remove_region(type, r2); +	return PAGE_ALIGN(sizeof(struct memblock_region) * +			  memblock.memory.max);  } -/* Defined below but needed now */ -static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); +#endif -static int __init_memblock memblock_double_array(struct memblock_type *type) +/** + * memblock_double_array - double the size of the memblock regions array + * @type: memblock type of the regions array being doubled + * @new_area_start: starting address of memory range to avoid overlap with + * @new_area_size: size of memory range to avoid overlap with + * + * Double the size of the @type regions array. If memblock is being used to + * allocate memory for a new reserved regions array and there is a previously + * allocated memory range [@new_area_start,@new_area_start+@new_area_size] + * waiting to be reserved, ensure the memory used by the new array does + * not overlap. + * + * RETURNS: + * 0 on success, -1 on failure. + */ +static int __init_memblock memblock_double_array(struct memblock_type *type, +						phys_addr_t new_area_start, +						phys_addr_t new_area_size)  {  	struct memblock_region *new_array, *old_array; +	phys_addr_t old_alloc_size, new_alloc_size;  	phys_addr_t old_size, new_size, addr;  	int use_slab = slab_is_available(); +	int *in_slab;  	/* We don't allow resizing until we know about the reserved regions  	 * of memory that aren't suitable for allocation @@ -236,36 +337,62 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)  	/* Calculate new doubled size */  	old_size = type->max * sizeof(struct memblock_region);  	new_size = old_size << 1; +	/* +	 * We need to allocated new one align to PAGE_SIZE, +	 *   so we can free them completely later. +	 */ +	old_alloc_size = PAGE_ALIGN(old_size); +	new_alloc_size = PAGE_ALIGN(new_size); + +	/* Retrieve the slab flag */ +	if (type == &memblock.memory) +		in_slab = &memblock_memory_in_slab; +	else +		in_slab = &memblock_reserved_in_slab;  	/* Try to find some space for it.  	 *  	 * WARNING: We assume that either slab_is_available() and we use it or -	 * we use MEMBLOCK for allocations. That means that this is unsafe to use -	 * when bootmem is currently active (unless bootmem itself is implemented -	 * on top of MEMBLOCK which isn't the case yet) +	 * we use MEMBLOCK for allocations. That means that this is unsafe to +	 * use when bootmem is currently active (unless bootmem itself is +	 * implemented on top of MEMBLOCK which isn't the case yet)  	 *  	 * This should however not be an issue for now, as we currently only -	 * call into MEMBLOCK while it's still active, or much later when slab is -	 * active for memory hotplug operations +	 * call into MEMBLOCK while it's still active, or much later when slab +	 * is active for memory hotplug operations  	 */  	if (use_slab) {  		new_array = kmalloc(new_size, GFP_KERNEL); -		addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array); -	} else -		addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); -	if (addr == MEMBLOCK_ERROR) { +		addr = new_array ? __pa(new_array) : 0; +	} else { +		/* only exclude range when trying to double reserved.regions */ +		if (type != &memblock.reserved) +			new_area_start = new_area_size = 0; + +		addr = memblock_find_in_range(new_area_start + new_area_size, +						memblock.current_limit, +						new_alloc_size, PAGE_SIZE); +		if (!addr && new_area_size) +			addr = memblock_find_in_range(0, +				min(new_area_start, memblock.current_limit), +				new_alloc_size, PAGE_SIZE); + +		new_array = addr ? __va(addr) : NULL; +	} +	if (!addr) {  		pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",  		       memblock_type_name(type), type->max, type->max * 2);  		return -1;  	} -	new_array = __va(addr); -	memblock_dbg("memblock: %s array is doubled to %ld at [%#010llx-%#010llx]", -		 memblock_type_name(type), type->max * 2, (u64)addr, (u64)addr + new_size - 1); +	memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]", +			memblock_type_name(type), type->max * 2, (u64)addr, +			(u64)addr + new_size - 1); -	/* Found space, we now need to move the array over before -	 * we add the reserved region since it may be our reserved -	 * array itself that is full. +	/* +	 * Found space, we now need to move the array over before we add the +	 * reserved region since it may be our reserved array itself that is +	 * full.  	 */  	memcpy(new_array, type->regions, old_size);  	memset(new_array + type->max, 0, old_size); @@ -273,208 +400,684 @@ static int __init_memblock memblock_double_array(struct memblock_type *type)  	type->regions = new_array;  	type->max <<= 1; -	/* If we use SLAB that's it, we are done */ -	if (use_slab) -		return 0; - -	/* Add the new reserved region now. Should not fail ! */ -	BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size) < 0); +	/* Free old array. We needn't free it if the array is the static one */ +	if (*in_slab) +		kfree(old_array); +	else if (old_array != memblock_memory_init_regions && +		 old_array != memblock_reserved_init_regions) +		memblock_free(__pa(old_array), old_alloc_size); -	/* If the array wasn't our static init one, then free it. We only do -	 * that before SLAB is available as later on, we don't know whether -	 * to use kfree or free_bootmem_pages(). Shouldn't be a big deal -	 * anyways +	/* +	 * Reserve the new array if that comes from the memblock.  Otherwise, we +	 * needn't do it  	 */ -	if (old_array != memblock_memory_init_regions && -	    old_array != memblock_reserved_init_regions) -		memblock_free(__pa(old_array), old_size); +	if (!use_slab) +		BUG_ON(memblock_reserve(addr, new_alloc_size)); + +	/* Update slab flag */ +	*in_slab = use_slab;  	return 0;  } -extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, -					  phys_addr_t addr2, phys_addr_t size2) +/** + * memblock_merge_regions - merge neighboring compatible regions + * @type: memblock type to scan + * + * Scan @type and merge neighboring compatible regions. + */ +static void __init_memblock memblock_merge_regions(struct memblock_type *type) +{ +	int i = 0; + +	/* cnt never goes below 1 */ +	while (i < type->cnt - 1) { +		struct memblock_region *this = &type->regions[i]; +		struct memblock_region *next = &type->regions[i + 1]; + +		if (this->base + this->size != next->base || +		    memblock_get_region_node(this) != +		    memblock_get_region_node(next) || +		    this->flags != next->flags) { +			BUG_ON(this->base + this->size > next->base); +			i++; +			continue; +		} + +		this->size += next->size; +		/* move forward from next + 1, index of which is i + 2 */ +		memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); +		type->cnt--; +	} +} + +/** + * memblock_insert_region - insert new memblock region + * @type:	memblock type to insert into + * @idx:	index for the insertion point + * @base:	base address of the new region + * @size:	size of the new region + * @nid:	node id of the new region + * @flags:	flags of the new region + * + * Insert new memblock region [@base,@base+@size) into @type at @idx. + * @type must already have extra room to accomodate the new region. + */ +static void __init_memblock memblock_insert_region(struct memblock_type *type, +						   int idx, phys_addr_t base, +						   phys_addr_t size, +						   int nid, unsigned long flags)  { -	return 1; +	struct memblock_region *rgn = &type->regions[idx]; + +	BUG_ON(type->cnt >= type->max); +	memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); +	rgn->base = base; +	rgn->size = size; +	rgn->flags = flags; +	memblock_set_region_node(rgn, nid); +	type->cnt++; +	type->total_size += size;  } -static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +/** + * memblock_add_range - add new memblock region + * @type: memblock type to add new region into + * @base: base address of the new region + * @size: size of the new region + * @nid: nid of the new region + * @flags: flags of the new region + * + * Add new memblock region [@base,@base+@size) into @type.  The new region + * is allowed to overlap with existing ones - overlaps don't affect already + * existing regions.  @type is guaranteed to be minimal (all neighbouring + * compatible regions are merged) after the addition. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init_memblock memblock_add_range(struct memblock_type *type, +				phys_addr_t base, phys_addr_t size, +				int nid, unsigned long flags)  { -	unsigned long coalesced = 0; -	long adjacent, i; +	bool insert = false; +	phys_addr_t obase = base; +	phys_addr_t end = base + memblock_cap_size(base, &size); +	int i, nr_new; + +	if (!size) +		return 0; -	if ((type->cnt == 1) && (type->regions[0].size == 0)) { +	/* special case for empty array */ +	if (type->regions[0].size == 0) { +		WARN_ON(type->cnt != 1 || type->total_size);  		type->regions[0].base = base;  		type->regions[0].size = size; +		type->regions[0].flags = flags; +		memblock_set_region_node(&type->regions[0], nid); +		type->total_size = size;  		return 0;  	} +repeat: +	/* +	 * The following is executed twice.  Once with %false @insert and +	 * then with %true.  The first counts the number of regions needed +	 * to accomodate the new area.  The second actually inserts them. +	 */ +	base = obase; +	nr_new = 0; -	/* First try and coalesce this MEMBLOCK with another. */  	for (i = 0; i < type->cnt; i++) { -		phys_addr_t rgnbase = type->regions[i].base; -		phys_addr_t rgnsize = type->regions[i].size; - -		if ((rgnbase == base) && (rgnsize == size)) -			/* Already have this region, so we're done */ -			return 0; +		struct memblock_region *rgn = &type->regions[i]; +		phys_addr_t rbase = rgn->base; +		phys_addr_t rend = rbase + rgn->size; -		adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); -		/* Check if arch allows coalescing */ -		if (adjacent != 0 && type == &memblock.memory && -		    !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize)) -			break; -		if (adjacent > 0) { -			type->regions[i].base -= size; -			type->regions[i].size += size; -			coalesced++; -			break; -		} else if (adjacent < 0) { -			type->regions[i].size += size; -			coalesced++; +		if (rbase >= end)  			break; +		if (rend <= base) +			continue; +		/* +		 * @rgn overlaps.  If it separates the lower part of new +		 * area, insert that portion. +		 */ +		if (rbase > base) { +			nr_new++; +			if (insert) +				memblock_insert_region(type, i++, base, +						       rbase - base, nid, +						       flags);  		} +		/* area below @rend is dealt with, forget about it */ +		base = min(rend, end); +	} + +	/* insert the remaining portion */ +	if (base < end) { +		nr_new++; +		if (insert) +			memblock_insert_region(type, i, base, end - base, +					       nid, flags);  	} -	/* If we plugged a hole, we may want to also coalesce with the -	 * next region +	/* +	 * If this was the first round, resize array and repeat for actual +	 * insertions; otherwise, merge and return.  	 */ -	if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) && -	    ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base, -							     type->regions[i].size, -							     type->regions[i+1].base, -							     type->regions[i+1].size)))) { -		memblock_coalesce_regions(type, i, i+1); -		coalesced++; +	if (!insert) { +		while (type->cnt + nr_new > type->max) +			if (memblock_double_array(type, obase, size) < 0) +				return -ENOMEM; +		insert = true; +		goto repeat; +	} else { +		memblock_merge_regions(type); +		return 0;  	} +} -	if (coalesced) -		return coalesced; +int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, +				       int nid) +{ +	return memblock_add_range(&memblock.memory, base, size, nid, 0); +} -	/* If we are out of space, we fail. It's too late to resize the array -	 * but then this shouldn't have happened in the first place. -	 */ -	if (WARN_ON(type->cnt >= type->max)) -		return -1; +int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) +{ +	return memblock_add_range(&memblock.memory, base, size, +				   MAX_NUMNODES, 0); +} -	/* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ -	for (i = type->cnt - 1; i >= 0; i--) { -		if (base < type->regions[i].base) { -			type->regions[i+1].base = type->regions[i].base; -			type->regions[i+1].size = type->regions[i].size; -		} else { -			type->regions[i+1].base = base; -			type->regions[i+1].size = size; +/** + * memblock_isolate_range - isolate given range into disjoint memblocks + * @type: memblock type to isolate range for + * @base: base of range to isolate + * @size: size of range to isolate + * @start_rgn: out parameter for the start of isolated region + * @end_rgn: out parameter for the end of isolated region + * + * Walk @type and ensure that regions don't cross the boundaries defined by + * [@base,@base+@size).  Crossing regions are split at the boundaries, + * which may create at most two more regions.  The index of the first + * region inside the range is returned in *@start_rgn and end in *@end_rgn. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int __init_memblock memblock_isolate_range(struct memblock_type *type, +					phys_addr_t base, phys_addr_t size, +					int *start_rgn, int *end_rgn) +{ +	phys_addr_t end = base + memblock_cap_size(base, &size); +	int i; + +	*start_rgn = *end_rgn = 0; + +	if (!size) +		return 0; + +	/* we'll create at most two more regions */ +	while (type->cnt + 2 > type->max) +		if (memblock_double_array(type, base, size) < 0) +			return -ENOMEM; + +	for (i = 0; i < type->cnt; i++) { +		struct memblock_region *rgn = &type->regions[i]; +		phys_addr_t rbase = rgn->base; +		phys_addr_t rend = rbase + rgn->size; + +		if (rbase >= end)  			break; +		if (rend <= base) +			continue; + +		if (rbase < base) { +			/* +			 * @rgn intersects from below.  Split and continue +			 * to process the next region - the new top half. +			 */ +			rgn->base = base; +			rgn->size -= base - rbase; +			type->total_size -= base - rbase; +			memblock_insert_region(type, i, rbase, base - rbase, +					       memblock_get_region_node(rgn), +					       rgn->flags); +		} else if (rend > end) { +			/* +			 * @rgn intersects from above.  Split and redo the +			 * current region - the new bottom half. +			 */ +			rgn->base = end; +			rgn->size -= end - rbase; +			type->total_size -= end - rbase; +			memblock_insert_region(type, i--, rbase, end - rbase, +					       memblock_get_region_node(rgn), +					       rgn->flags); +		} else { +			/* @rgn is fully contained, record it */ +			if (!*end_rgn) +				*start_rgn = i; +			*end_rgn = i + 1;  		}  	} -	if (base < type->regions[0].base) { -		type->regions[0].base = base; -		type->regions[0].size = size; -	} -	type->cnt++; +	return 0; +} -	/* The array is full ? Try to resize it. If that fails, we undo -	 * our allocation and return an error -	 */ -	if (type->cnt == type->max && memblock_double_array(type)) { -		type->cnt--; -		return -1; -	} +int __init_memblock memblock_remove_range(struct memblock_type *type, +					  phys_addr_t base, phys_addr_t size) +{ +	int start_rgn, end_rgn; +	int i, ret; + +	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); +	if (ret) +		return ret; +	for (i = end_rgn - 1; i >= start_rgn; i--) +		memblock_remove_region(type, i);  	return 0;  } -long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) +{ +	return memblock_remove_range(&memblock.memory, base, size); +} + + +int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)  { -	return memblock_add_region(&memblock.memory, base, size); +	memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n", +		     (unsigned long long)base, +		     (unsigned long long)base + size - 1, +		     (void *)_RET_IP_); +	kmemleak_free_part(__va(base), size); +	return memblock_remove_range(&memblock.reserved, base, size);  } -static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static int __init_memblock memblock_reserve_region(phys_addr_t base, +						   phys_addr_t size, +						   int nid, +						   unsigned long flags)  { -	phys_addr_t rgnbegin, rgnend; -	phys_addr_t end = base + size; -	int i; +	struct memblock_type *_rgn = &memblock.reserved; -	rgnbegin = rgnend = 0; /* supress gcc warnings */ +	memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", +		     (unsigned long long)base, +		     (unsigned long long)base + size - 1, +		     flags, (void *)_RET_IP_); -	/* Find the region where (base, size) belongs to */ -	for (i=0; i < type->cnt; i++) { -		rgnbegin = type->regions[i].base; -		rgnend = rgnbegin + type->regions[i].size; +	return memblock_add_range(_rgn, base, size, nid, flags); +} -		if ((rgnbegin <= base) && (end <= rgnend)) -			break; -	} +int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) +{ +	return memblock_reserve_region(base, size, MAX_NUMNODES, 0); +} -	/* Didn't find the region */ -	if (i == type->cnt) -		return -1; +/** + * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. + * @base: the base phys addr of the region + * @size: the size of the region + * + * This function isolates region [@base, @base + @size), and mark it with flag + * MEMBLOCK_HOTPLUG. + * + * Return 0 on succees, -errno on failure. + */ +int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) +{ +	struct memblock_type *type = &memblock.memory; +	int i, ret, start_rgn, end_rgn; -	/* Check to see if we are removing entire region */ -	if ((rgnbegin == base) && (rgnend == end)) { -		memblock_remove_region(type, i); -		return 0; -	} +	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); +	if (ret) +		return ret; -	/* Check to see if region is matching at the front */ -	if (rgnbegin == base) { -		type->regions[i].base = end; -		type->regions[i].size -= size; -		return 0; -	} +	for (i = start_rgn; i < end_rgn; i++) +		memblock_set_region_flags(&type->regions[i], MEMBLOCK_HOTPLUG); -	/* Check to see if the region is matching at the end */ -	if (rgnend == end) { -		type->regions[i].size -= size; -		return 0; +	memblock_merge_regions(type); +	return 0; +} + +/** + * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. + * @base: the base phys addr of the region + * @size: the size of the region + * + * This function isolates region [@base, @base + @size), and clear flag + * MEMBLOCK_HOTPLUG for the isolated regions. + * + * Return 0 on succees, -errno on failure. + */ +int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) +{ +	struct memblock_type *type = &memblock.memory; +	int i, ret, start_rgn, end_rgn; + +	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); +	if (ret) +		return ret; + +	for (i = start_rgn; i < end_rgn; i++) +		memblock_clear_region_flags(&type->regions[i], +					    MEMBLOCK_HOTPLUG); + +	memblock_merge_regions(type); +	return 0; +} + +/** + * __next__mem_range - next function for for_each_free_mem_range() etc. + * @idx: pointer to u64 loop variable + * @nid: node selector, %NUMA_NO_NODE for all nodes + * @type_a: pointer to memblock_type from where the range is taken + * @type_b: pointer to memblock_type which excludes memory from being taken + * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @out_nid: ptr to int for nid of the range, can be %NULL + * + * Find the first area from *@idx which matches @nid, fill the out + * parameters, and update *@idx for the next iteration.  The lower 32bit of + * *@idx contains index into type_a and the upper 32bit indexes the + * areas before each region in type_b.	For example, if type_b regions + * look like the following, + * + *	0:[0-16), 1:[32-48), 2:[128-130) + * + * The upper 32bit indexes the following regions. + * + *	0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) + * + * As both region arrays are sorted, the function advances the two indices + * in lockstep and returns each intersection. + */ +void __init_memblock __next_mem_range(u64 *idx, int nid, +				      struct memblock_type *type_a, +				      struct memblock_type *type_b, +				      phys_addr_t *out_start, +				      phys_addr_t *out_end, int *out_nid) +{ +	int idx_a = *idx & 0xffffffff; +	int idx_b = *idx >> 32; + +	if (WARN_ONCE(nid == MAX_NUMNODES, +	"Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) +		nid = NUMA_NO_NODE; + +	for (; idx_a < type_a->cnt; idx_a++) { +		struct memblock_region *m = &type_a->regions[idx_a]; + +		phys_addr_t m_start = m->base; +		phys_addr_t m_end = m->base + m->size; +		int	    m_nid = memblock_get_region_node(m); + +		/* only memory regions are associated with nodes, check it */ +		if (nid != NUMA_NO_NODE && nid != m_nid) +			continue; + +		if (!type_b) { +			if (out_start) +				*out_start = m_start; +			if (out_end) +				*out_end = m_end; +			if (out_nid) +				*out_nid = m_nid; +			idx_a++; +			*idx = (u32)idx_a | (u64)idx_b << 32; +			return; +		} + +		/* scan areas before each reservation */ +		for (; idx_b < type_b->cnt + 1; idx_b++) { +			struct memblock_region *r; +			phys_addr_t r_start; +			phys_addr_t r_end; + +			r = &type_b->regions[idx_b]; +			r_start = idx_b ? r[-1].base + r[-1].size : 0; +			r_end = idx_b < type_b->cnt ? +				r->base : ULLONG_MAX; + +			/* +			 * if idx_b advanced past idx_a, +			 * break out to advance idx_a +			 */ +			if (r_start >= m_end) +				break; +			/* if the two regions intersect, we're done */ +			if (m_start < r_end) { +				if (out_start) +					*out_start = +						max(m_start, r_start); +				if (out_end) +					*out_end = min(m_end, r_end); +				if (out_nid) +					*out_nid = m_nid; +				/* +				 * The region which ends first is +				 * advanced for the next iteration. +				 */ +				if (m_end <= r_end) +					idx_a++; +				else +					idx_b++; +				*idx = (u32)idx_a | (u64)idx_b << 32; +				return; +			} +		}  	} -	/* -	 * We need to split the entry -  adjust the current one to the -	 * beginging of the hole and add the region after hole. -	 */ -	type->regions[i].size = base - type->regions[i].base; -	return memblock_add_region(type, end, rgnend - end); +	/* signal end of iteration */ +	*idx = ULLONG_MAX;  } -long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) +/** + * __next_mem_range_rev - generic next function for for_each_*_range_rev() + * + * Finds the next range from type_a which is not marked as unsuitable + * in type_b. + * + * @idx: pointer to u64 loop variable + * @nid: nid: node selector, %NUMA_NO_NODE for all nodes + * @type_a: pointer to memblock_type from where the range is taken + * @type_b: pointer to memblock_type which excludes memory from being taken + * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @out_nid: ptr to int for nid of the range, can be %NULL + * + * Reverse of __next_mem_range(). + */ +void __init_memblock __next_mem_range_rev(u64 *idx, int nid, +					  struct memblock_type *type_a, +					  struct memblock_type *type_b, +					  phys_addr_t *out_start, +					  phys_addr_t *out_end, int *out_nid)  { -	return __memblock_remove(&memblock.memory, base, size); +	int idx_a = *idx & 0xffffffff; +	int idx_b = *idx >> 32; + +	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) +		nid = NUMA_NO_NODE; + +	if (*idx == (u64)ULLONG_MAX) { +		idx_a = type_a->cnt - 1; +		idx_b = type_b->cnt; +	} + +	for (; idx_a >= 0; idx_a--) { +		struct memblock_region *m = &type_a->regions[idx_a]; + +		phys_addr_t m_start = m->base; +		phys_addr_t m_end = m->base + m->size; +		int m_nid = memblock_get_region_node(m); + +		/* only memory regions are associated with nodes, check it */ +		if (nid != NUMA_NO_NODE && nid != m_nid) +			continue; + +		/* skip hotpluggable memory regions if needed */ +		if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) +			continue; + +		if (!type_b) { +			if (out_start) +				*out_start = m_start; +			if (out_end) +				*out_end = m_end; +			if (out_nid) +				*out_nid = m_nid; +			idx_a++; +			*idx = (u32)idx_a | (u64)idx_b << 32; +			return; +		} + +		/* scan areas before each reservation */ +		for (; idx_b >= 0; idx_b--) { +			struct memblock_region *r; +			phys_addr_t r_start; +			phys_addr_t r_end; + +			r = &type_b->regions[idx_b]; +			r_start = idx_b ? r[-1].base + r[-1].size : 0; +			r_end = idx_b < type_b->cnt ? +				r->base : ULLONG_MAX; +			/* +			 * if idx_b advanced past idx_a, +			 * break out to advance idx_a +			 */ + +			if (r_end <= m_start) +				break; +			/* if the two regions intersect, we're done */ +			if (m_end > r_start) { +				if (out_start) +					*out_start = max(m_start, r_start); +				if (out_end) +					*out_end = min(m_end, r_end); +				if (out_nid) +					*out_nid = m_nid; +				if (m_start >= r_start) +					idx_a--; +				else +					idx_b--; +				*idx = (u32)idx_a | (u64)idx_b << 32; +				return; +			} +		} +	} +	/* signal end of iteration */ +	*idx = ULLONG_MAX;  } -long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +/* + * Common iterator interface used to define for_each_mem_range(). + */ +void __init_memblock __next_mem_pfn_range(int *idx, int nid, +				unsigned long *out_start_pfn, +				unsigned long *out_end_pfn, int *out_nid)  { -	return __memblock_remove(&memblock.reserved, base, size); +	struct memblock_type *type = &memblock.memory; +	struct memblock_region *r; + +	while (++*idx < type->cnt) { +		r = &type->regions[*idx]; + +		if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) +			continue; +		if (nid == MAX_NUMNODES || nid == r->nid) +			break; +	} +	if (*idx >= type->cnt) { +		*idx = -1; +		return; +	} + +	if (out_start_pfn) +		*out_start_pfn = PFN_UP(r->base); +	if (out_end_pfn) +		*out_end_pfn = PFN_DOWN(r->base + r->size); +	if (out_nid) +		*out_nid = r->nid;  } -long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) +/** + * memblock_set_node - set node ID on memblock regions + * @base: base of area to set node ID for + * @size: size of area to set node ID for + * @type: memblock type to set node ID for + * @nid: node ID to set + * + * Set the nid of memblock @type regions in [@base,@base+@size) to @nid. + * Regions which cross the area boundaries are split as necessary. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, +				      struct memblock_type *type, int nid)  { -	struct memblock_type *_rgn = &memblock.reserved; +	int start_rgn, end_rgn; +	int i, ret; + +	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); +	if (ret) +		return ret; -	BUG_ON(0 == size); +	for (i = start_rgn; i < end_rgn; i++) +		memblock_set_region_node(&type->regions[i], nid); -	return memblock_add_region(_rgn, base, size); +	memblock_merge_regions(type); +	return 0;  } +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) +static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, +					phys_addr_t align, phys_addr_t start, +					phys_addr_t end, int nid)  {  	phys_addr_t found; -	/* We align the size to limit fragmentation. Without this, a lot of -	 * small allocs quickly eat up the whole reserve array on sparc -	 */ -	size = memblock_align_up(size, align); +	if (!align) +		align = SMP_CACHE_BYTES; -	found = memblock_find_base(size, align, 0, max_addr); -	if (found != MEMBLOCK_ERROR && -	    memblock_add_region(&memblock.reserved, found, size) >= 0) +	found = memblock_find_in_range_node(size, align, start, end, nid); +	if (found && !memblock_reserve(found, size)) { +		/* +		 * The min_count is set to 0 so that memblock allocations are +		 * never reported as leaks. +		 */ +		kmemleak_alloc(__va(found), size, 0, 0);  		return found; - +	}  	return 0;  } +phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, +					phys_addr_t start, phys_addr_t end) +{ +	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE); +} + +static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, +					phys_addr_t align, phys_addr_t max_addr, +					int nid) +{ +	return memblock_alloc_range_nid(size, align, 0, max_addr, nid); +} + +phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) +{ +	return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) +{ +	return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE); +} +  phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)  {  	phys_addr_t alloc; @@ -493,116 +1096,247 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)  	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);  } +phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) +{ +	phys_addr_t res = memblock_alloc_nid(size, align, nid); -/* - * Additional node-local allocators. Search for node memory is bottom up - * and walks memblock regions within that node bottom-up as well, but allocation - * within an memblock region is top-down. XXX I plan to fix that at some stage +	if (res) +		return res; +	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); +} + +/** + * memblock_virt_alloc_internal - allocate boot memory block + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region to allocate (phys address) + * @max_addr: the upper bound of the memory region to allocate (phys address) + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * The @min_addr limit is dropped if it can not be satisfied and the allocation + * will fall back to memory below @min_addr. Also, allocation may fall back + * to any node in the system if the specified node can not + * hold the requested memory. + * + * The allocation is performed from memory region limited by + * memblock.current_limit if @max_addr == %BOOTMEM_ALLOC_ACCESSIBLE. + * + * The memory block is aligned on SMP_CACHE_BYTES if @align == 0. + * + * The phys address of allocated boot memory block is converted to virtual and + * allocated memory is reset to 0.   * - * WARNING: Only available after early_node_map[] has been populated, - * on some architectures, that is after all the calls to add_active_range() - * have been done to populate it. + * In addition, function sets the min_count to 0 using kmemleak_alloc for + * allocated boot memory block, so that it is never reported as leaks. + * + * RETURNS: + * Virtual address of allocated memory block on success, NULL on failure.   */ - -phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) +static void * __init memblock_virt_alloc_internal( +				phys_addr_t size, phys_addr_t align, +				phys_addr_t min_addr, phys_addr_t max_addr, +				int nid)  { -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP +	phys_addr_t alloc; +	void *ptr; + +	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) +		nid = NUMA_NO_NODE; +  	/* -	 * This code originates from sparc which really wants use to walk by addresses -	 * and returns the nid. This is not very convenient for early_pfn_map[] users -	 * as the map isn't sorted yet, and it really wants to be walked by nid. -	 * -	 * For now, I implement the inefficient method below which walks the early -	 * map multiple times. Eventually we may want to use an ARCH config option -	 * to implement a completely different method for both case. +	 * Detect any accidental use of these APIs after slab is ready, as at +	 * this moment memblock may be deinitialized already and its +	 * internal data may be destroyed (after execution of free_all_bootmem)  	 */ -	unsigned long start_pfn, end_pfn; -	int i; - -	for (i = 0; i < MAX_NUMNODES; i++) { -		get_pfn_range_for_nid(i, &start_pfn, &end_pfn); -		if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn)) -			continue; -		*nid = i; -		return min(end, PFN_PHYS(end_pfn)); +	if (WARN_ON_ONCE(slab_is_available())) +		return kzalloc_node(size, GFP_NOWAIT, nid); + +	if (!align) +		align = SMP_CACHE_BYTES; + +	if (max_addr > memblock.current_limit) +		max_addr = memblock.current_limit; + +again: +	alloc = memblock_find_in_range_node(size, align, min_addr, max_addr, +					    nid); +	if (alloc) +		goto done; + +	if (nid != NUMA_NO_NODE) { +		alloc = memblock_find_in_range_node(size, align, min_addr, +						    max_addr,  NUMA_NO_NODE); +		if (alloc) +			goto done;  	} -#endif -	*nid = 0; -	return end; -} +	if (min_addr) { +		min_addr = 0; +		goto again; +	} else { +		goto error; +	} -static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, -					       phys_addr_t size, -					       phys_addr_t align, int nid) -{ -	phys_addr_t start, end; +done: +	memblock_reserve(alloc, size); +	ptr = phys_to_virt(alloc); +	memset(ptr, 0, size); -	start = mp->base; -	end = start + mp->size; +	/* +	 * The min_count is set to 0 so that bootmem allocated blocks +	 * are never reported as leaks. This is because many of these blocks +	 * are only referred via the physical address which is not +	 * looked up by kmemleak. +	 */ +	kmemleak_alloc(ptr, size, 0, 0); -	start = memblock_align_up(start, align); -	while (start < end) { -		phys_addr_t this_end; -		int this_nid; +	return ptr; -		this_end = memblock_nid_range(start, end, &this_nid); -		if (this_nid == nid) { -			phys_addr_t ret = memblock_find_region(start, this_end, size, align); -			if (ret != MEMBLOCK_ERROR && -			    memblock_add_region(&memblock.reserved, ret, size) >= 0) -				return ret; -		} -		start = this_end; -	} - -	return MEMBLOCK_ERROR; +error: +	return NULL;  } -phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) +/** + * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region from where the allocation + *	  is preferred (phys address) + * @max_addr: the upper bound of the memory region from where the allocation + *	      is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to + *	      allocate only from memory limited by memblock.current_limit value + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Public version of _memblock_virt_alloc_try_nid_nopanic() which provides + * additional debug information (including caller info), if enabled. + * + * RETURNS: + * Virtual address of allocated memory block on success, NULL on failure. + */ +void * __init memblock_virt_alloc_try_nid_nopanic( +				phys_addr_t size, phys_addr_t align, +				phys_addr_t min_addr, phys_addr_t max_addr, +				int nid)  { -	struct memblock_type *mem = &memblock.memory; -	int i; - -	BUG_ON(0 == size); - -	/* We align the size to limit fragmentation. Without this, a lot of -	 * small allocs quickly eat up the whole reserve array on sparc -	 */ -	size = memblock_align_up(size, align); - -	/* We do a bottom-up search for a region with the right -	 * nid since that's easier considering how memblock_nid_range() -	 * works -	 */ -	for (i = 0; i < mem->cnt; i++) { -		phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i], -					       size, align, nid); -		if (ret != MEMBLOCK_ERROR) -			return ret; -	} - -	return 0; +	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", +		     __func__, (u64)size, (u64)align, nid, (u64)min_addr, +		     (u64)max_addr, (void *)_RET_IP_); +	return memblock_virt_alloc_internal(size, align, min_addr, +					     max_addr, nid);  } -phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) +/** + * memblock_virt_alloc_try_nid - allocate boot memory block with panicking + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region from where the allocation + *	  is preferred (phys address) + * @max_addr: the upper bound of the memory region from where the allocation + *	      is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to + *	      allocate only from memory limited by memblock.current_limit value + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Public panicking version of _memblock_virt_alloc_try_nid_nopanic() + * which provides debug information (including caller info), if enabled, + * and panics if the request can not be satisfied. + * + * RETURNS: + * Virtual address of allocated memory block on success, NULL on failure. + */ +void * __init memblock_virt_alloc_try_nid( +			phys_addr_t size, phys_addr_t align, +			phys_addr_t min_addr, phys_addr_t max_addr, +			int nid)  { -	phys_addr_t res = memblock_alloc_nid(size, align, nid); +	void *ptr; + +	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", +		     __func__, (u64)size, (u64)align, nid, (u64)min_addr, +		     (u64)max_addr, (void *)_RET_IP_); +	ptr = memblock_virt_alloc_internal(size, align, +					   min_addr, max_addr, nid); +	if (ptr) +		return ptr; + +	panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx\n", +	      __func__, (u64)size, (u64)align, nid, (u64)min_addr, +	      (u64)max_addr); +	return NULL; +} -	if (res) -		return res; -	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); +/** + * __memblock_free_early - free boot memory block + * @base: phys starting address of the  boot memory block + * @size: size of the boot memory block in bytes + * + * Free boot memory block previously allocated by memblock_virt_alloc_xx() API. + * The freeing memory will not be released to the buddy allocator. + */ +void __init __memblock_free_early(phys_addr_t base, phys_addr_t size) +{ +	memblock_dbg("%s: [%#016llx-%#016llx] %pF\n", +		     __func__, (u64)base, (u64)base + size - 1, +		     (void *)_RET_IP_); +	kmemleak_free_part(__va(base), size); +	memblock_remove_range(&memblock.reserved, base, size);  } +/* + * __memblock_free_late - free bootmem block pages directly to buddy allocator + * @addr: phys starting address of the  boot memory block + * @size: size of the boot memory block in bytes + * + * This is only useful when the bootmem allocator has already been torn + * down, but we are still initializing the system.  Pages are released directly + * to the buddy allocator, no bootmem metadata is updated because it is gone. + */ +void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) +{ +	u64 cursor, end; + +	memblock_dbg("%s: [%#016llx-%#016llx] %pF\n", +		     __func__, (u64)base, (u64)base + size - 1, +		     (void *)_RET_IP_); +	kmemleak_free_part(__va(base), size); +	cursor = PFN_UP(base); +	end = PFN_DOWN(base + size); + +	for (; cursor < end; cursor++) { +		__free_pages_bootmem(pfn_to_page(cursor), 0); +		totalram_pages++; +	} +}  /*   * Remaining API functions   */ -/* You must call memblock_analyze() before this. */  phys_addr_t __init memblock_phys_mem_size(void)  { -	return memblock.memory_size; +	return memblock.memory.total_size; +} + +phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) +{ +	unsigned long pages = 0; +	struct memblock_region *r; +	unsigned long start_pfn, end_pfn; + +	for_each_memblock(memory, r) { +		start_pfn = memblock_region_memory_base_pfn(r); +		end_pfn = memblock_region_memory_end_pfn(r); +		start_pfn = min_t(unsigned long, start_pfn, limit_pfn); +		end_pfn = min_t(unsigned long, end_pfn, limit_pfn); +		pages += end_pfn - start_pfn; +	} + +	return PFN_PHYS(pages); +} + +/* lowest address */ +phys_addr_t __init_memblock memblock_start_of_DRAM(void) +{ +	return memblock.memory.regions[0].base;  }  phys_addr_t __init_memblock memblock_end_of_DRAM(void) @@ -612,45 +1346,28 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)  	return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);  } -/* You must call memblock_analyze() after this. */ -void __init memblock_enforce_memory_limit(phys_addr_t memory_limit) +void __init memblock_enforce_memory_limit(phys_addr_t limit)  { -	unsigned long i; -	phys_addr_t limit; -	struct memblock_region *p; +	phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; +	struct memblock_region *r; -	if (!memory_limit) +	if (!limit)  		return; -	/* Truncate the memblock regions to satisfy the memory limit. */ -	limit = memory_limit; -	for (i = 0; i < memblock.memory.cnt; i++) { -		if (limit > memblock.memory.regions[i].size) { -			limit -= memblock.memory.regions[i].size; -			continue; +	/* find out max address */ +	for_each_memblock(memory, r) { +		if (limit <= r->size) { +			max_addr = r->base + limit; +			break;  		} - -		memblock.memory.regions[i].size = limit; -		memblock.memory.cnt = i + 1; -		break; +		limit -= r->size;  	} -	memory_limit = memblock_end_of_DRAM(); - -	/* And truncate any reserves above the limit also. */ -	for (i = 0; i < memblock.reserved.cnt; i++) { -		p = &memblock.reserved.regions[i]; - -		if (p->base > memory_limit) -			p->size = 0; -		else if ((p->base + p->size) > memory_limit) -			p->size = memory_limit - p->base; - -		if (p->size == 0) { -			memblock_remove_region(&memblock.reserved, i); -			i--; -		} -	} +	/* truncate both memory and reserved regions */ +	memblock_remove_range(&memblock.memory, max_addr, +			      (phys_addr_t)ULLONG_MAX); +	memblock_remove_range(&memblock.reserved, max_addr, +			      (phys_addr_t)ULLONG_MAX);  }  static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) @@ -681,108 +1398,137 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)  	return memblock_search(&memblock.memory, addr) != -1;  } +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +int __init_memblock memblock_search_pfn_nid(unsigned long pfn, +			 unsigned long *start_pfn, unsigned long *end_pfn) +{ +	struct memblock_type *type = &memblock.memory; +	int mid = memblock_search(type, PFN_PHYS(pfn)); + +	if (mid == -1) +		return -1; + +	*start_pfn = PFN_DOWN(type->regions[mid].base); +	*end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); + +	return type->regions[mid].nid; +} +#endif + +/** + * memblock_is_region_memory - check if a region is a subset of memory + * @base: base of region to check + * @size: size of region to check + * + * Check if the region [@base, @base+@size) is a subset of a memory block. + * + * RETURNS: + * 0 if false, non-zero if true + */  int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)  { -	int idx = memblock_search(&memblock.reserved, base); +	int idx = memblock_search(&memblock.memory, base); +	phys_addr_t end = base + memblock_cap_size(base, &size);  	if (idx == -1)  		return 0; -	return memblock.reserved.regions[idx].base <= base && -		(memblock.reserved.regions[idx].base + -		 memblock.reserved.regions[idx].size) >= (base + size); +	return memblock.memory.regions[idx].base <= base && +		(memblock.memory.regions[idx].base + +		 memblock.memory.regions[idx].size) >= end;  } +/** + * memblock_is_region_reserved - check if a region intersects reserved memory + * @base: base of region to check + * @size: size of region to check + * + * Check if the region [@base, @base+@size) intersects a reserved memory block. + * + * RETURNS: + * 0 if false, non-zero if true + */  int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)  { +	memblock_cap_size(base, &size);  	return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;  } +void __init_memblock memblock_trim_memory(phys_addr_t align) +{ +	phys_addr_t start, end, orig_start, orig_end; +	struct memblock_region *r; + +	for_each_memblock(memory, r) { +		orig_start = r->base; +		orig_end = r->base + r->size; +		start = round_up(orig_start, align); +		end = round_down(orig_end, align); + +		if (start == orig_start && end == orig_end) +			continue; + +		if (start < end) { +			r->base = start; +			r->size = end - start; +		} else { +			memblock_remove_region(&memblock.memory, +					       r - memblock.memory.regions); +			r--; +		} +	} +}  void __init_memblock memblock_set_current_limit(phys_addr_t limit)  {  	memblock.current_limit = limit;  } -static void __init_memblock memblock_dump(struct memblock_type *region, char *name) +phys_addr_t __init_memblock memblock_get_current_limit(void) +{ +	return memblock.current_limit; +} + +static void __init_memblock memblock_dump(struct memblock_type *type, char *name)  {  	unsigned long long base, size; +	unsigned long flags;  	int i; -	pr_info(" %s.cnt  = 0x%lx\n", name, region->cnt); - -	for (i = 0; i < region->cnt; i++) { -		base = region->regions[i].base; -		size = region->regions[i].size; +	pr_info(" %s.cnt  = 0x%lx\n", name, type->cnt); -		pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n", -		    name, i, base, base + size - 1, size); +	for (i = 0; i < type->cnt; i++) { +		struct memblock_region *rgn = &type->regions[i]; +		char nid_buf[32] = ""; + +		base = rgn->base; +		size = rgn->size; +		flags = rgn->flags; +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +		if (memblock_get_region_node(rgn) != MAX_NUMNODES) +			snprintf(nid_buf, sizeof(nid_buf), " on node %d", +				 memblock_get_region_node(rgn)); +#endif +		pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s flags: %#lx\n", +			name, i, base, base + size - 1, size, nid_buf, flags);  	}  } -void __init_memblock memblock_dump_all(void) +void __init_memblock __memblock_dump_all(void)  { -	if (!memblock_debug) -		return; -  	pr_info("MEMBLOCK configuration:\n"); -	pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); +	pr_info(" memory size = %#llx reserved size = %#llx\n", +		(unsigned long long)memblock.memory.total_size, +		(unsigned long long)memblock.reserved.total_size);  	memblock_dump(&memblock.memory, "memory");  	memblock_dump(&memblock.reserved, "reserved");  } -void __init memblock_analyze(void) +void __init memblock_allow_resize(void)  { -	int i; - -	/* Check marker in the unused last array entry */ -	WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base -		!= (phys_addr_t)RED_INACTIVE); -	WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base -		!= (phys_addr_t)RED_INACTIVE); - -	memblock.memory_size = 0; - -	for (i = 0; i < memblock.memory.cnt; i++) -		memblock.memory_size += memblock.memory.regions[i].size; - -	/* We allow resizing from there */  	memblock_can_resize = 1;  } -void __init memblock_init(void) -{ -	static int init_done __initdata = 0; - -	if (init_done) -		return; -	init_done = 1; - -	/* Hookup the initial arrays */ -	memblock.memory.regions	= memblock_memory_init_regions; -	memblock.memory.max		= INIT_MEMBLOCK_REGIONS; -	memblock.reserved.regions	= memblock_reserved_init_regions; -	memblock.reserved.max	= INIT_MEMBLOCK_REGIONS; - -	/* Write a marker in the unused last array entry */ -	memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE; -	memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE; - -	/* Create a dummy zero size MEMBLOCK which will get coalesced away later. -	 * This simplifies the memblock_add() code below... -	 */ -	memblock.memory.regions[0].base = 0; -	memblock.memory.regions[0].size = 0; -	memblock.memory.cnt = 1; - -	/* Ditto. */ -	memblock.reserved.regions[0].base = 0; -	memblock.reserved.regions[0].size = 0; -	memblock.reserved.cnt = 1; - -	memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE; -} -  static int __init early_memblock(char *p)  {  	if (p && strstr(p, "debug")) @@ -791,7 +1537,7 @@ static int __init early_memblock(char *p)  }  early_param("memblock", early_memblock); -#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK) +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK)  static int memblock_debug_show(struct seq_file *m, void *private)  { @@ -834,6 +1580,9 @@ static int __init memblock_init_debugfs(void)  		return -ENXIO;  	debugfs_create_file("memory", S_IRUGO, root, &memblock.memory, &memblock_debug_fops);  	debugfs_create_file("reserved", S_IRUGO, root, &memblock.reserved, &memblock_debug_fops); +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +	debugfs_create_file("physmem", S_IRUGO, root, &memblock.physmem, &memblock_debug_fops); +#endif  	return 0;  }  | 
