diff options
Diffstat (limited to 'mm/memblock.c')
| -rw-r--r-- | mm/memblock.c | 1645 |
1 files changed, 1198 insertions, 447 deletions
diff --git a/mm/memblock.c b/mm/memblock.c index 4618fda975a..6d2f219a48b 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,15 +20,47 @@ #include <linux/seq_file.h> #include <linux/memblock.h> -struct memblock memblock __initdata_memblock; +#include <asm-generic/sections.h> +#include <linux/io.h> + +#include "internal.h" + +static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock; +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP +static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock; +#endif + +struct memblock memblock __initdata_memblock = { + .memory.regions = memblock_memory_init_regions, + .memory.cnt = 1, /* empty dummy entry */ + .memory.max = INIT_MEMBLOCK_REGIONS, + + .reserved.regions = memblock_reserved_init_regions, + .reserved.cnt = 1, /* empty dummy entry */ + .reserved.max = INIT_MEMBLOCK_REGIONS, + +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP + .physmem.regions = memblock_physmem_init_regions, + .physmem.cnt = 1, /* empty dummy entry */ + .physmem.max = INIT_PHYSMEM_REGIONS, +#endif + + .bottom_up = false, + .current_limit = MEMBLOCK_ALLOC_ANYWHERE, +}; int memblock_debug __initdata_memblock; -int memblock_can_resize __initdata_memblock; -static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; -static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1] __initdata_memblock; +#ifdef CONFIG_MOVABLE_NODE +bool movable_node_enabled __initdata_memblock = false; +#endif +static int memblock_can_resize __initdata_memblock; +static int memblock_memory_in_slab __initdata_memblock = 0; +static int memblock_reserved_in_slab __initdata_memblock = 0; /* inline so we don't get a warning when pr_debug is compiled out */ -static inline const char *memblock_type_name(struct memblock_type *type) +static __init_memblock const char * +memblock_type_name(struct memblock_type *type) { if (type == &memblock.memory) return "memory"; @@ -38,49 +70,23 @@ static inline const char *memblock_type_name(struct memblock_type *type) return "unknown"; } -/* - * Address comparison utilities - */ - -static phys_addr_t __init_memblock memblock_align_down(phys_addr_t addr, phys_addr_t size) -{ - return addr & ~(size - 1); -} - -static phys_addr_t __init_memblock memblock_align_up(phys_addr_t addr, phys_addr_t size) +/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */ +static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size) { - return (addr + (size - 1)) & ~(size - 1); + return *size = min(*size, (phys_addr_t)ULLONG_MAX - base); } +/* + * Address comparison utilities + */ static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2) { return ((base1 < (base2 + size2)) && (base2 < (base1 + size1))); } -static long __init_memblock memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1, - phys_addr_t base2, phys_addr_t size2) -{ - if (base2 == base1 + size1) - return 1; - else if (base1 == base2 + size2) - return -1; - - return 0; -} - -static long __init_memblock memblock_regions_adjacent(struct memblock_type *type, - unsigned long r1, unsigned long r2) -{ - phys_addr_t base1 = type->regions[r1].base; - phys_addr_t size1 = type->regions[r1].size; - phys_addr_t base2 = type->regions[r2].base; - phys_addr_t size2 = type->regions[r2].size; - - return memblock_addrs_adjacent(base1, size1, base2, size2); -} - -long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static long __init_memblock memblock_overlaps_region(struct memblock_type *type, + phys_addr_t base, phys_addr_t size) { unsigned long i; @@ -95,135 +101,232 @@ long __init_memblock memblock_overlaps_region(struct memblock_type *type, phys_a } /* - * Find, allocate, deallocate or reserve unreserved regions. All allocations - * are top-down. + * __memblock_find_range_bottom_up - find free area utility in bottom-up + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Utility called from memblock_find_in_range_node(), find free area bottom-up. + * + * RETURNS: + * Found address on success, 0 on failure. */ +static phys_addr_t __init_memblock +__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align, int nid) +{ + phys_addr_t this_start, this_end, cand; + u64 i; + + for_each_free_mem_range(i, nid, &this_start, &this_end, NULL) { + this_start = clamp(this_start, start, end); + this_end = clamp(this_end, start, end); + + cand = round_up(this_start, align); + if (cand < this_end && this_end - cand >= size) + return cand; + } -static phys_addr_t __init_memblock memblock_find_region(phys_addr_t start, phys_addr_t end, - phys_addr_t size, phys_addr_t align) + return 0; +} + +/** + * __memblock_find_range_top_down - find free area utility, in top-down + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Utility called from memblock_find_in_range_node(), find free area top-down. + * + * RETURNS: + * Found address on success, 0 on failure. + */ +static phys_addr_t __init_memblock +__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, + phys_addr_t size, phys_addr_t align, int nid) { - phys_addr_t base, res_base; - long j; + phys_addr_t this_start, this_end, cand; + u64 i; - /* In case, huge size is requested */ - if (end < size) - return MEMBLOCK_ERROR; + for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) { + this_start = clamp(this_start, start, end); + this_end = clamp(this_end, start, end); - base = memblock_align_down((end - size), align); + if (this_end < size) + continue; - /* Prevent allocations returning 0 as it's also used to - * indicate an allocation failure - */ - if (start == 0) - start = PAGE_SIZE; - - while (start <= base) { - j = memblock_overlaps_region(&memblock.reserved, base, size); - if (j < 0) - return base; - res_base = memblock.reserved.regions[j].base; - if (res_base < size) - break; - base = memblock_align_down(res_base - size, align); + cand = round_down(this_end - size, align); + if (cand >= this_start) + return cand; } - return MEMBLOCK_ERROR; + return 0; } -static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, - phys_addr_t align, phys_addr_t start, phys_addr_t end) +/** + * memblock_find_in_range_node - find free area in given range and node + * @size: size of free area to find + * @align: alignment of free area to find + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Find @size free area aligned to @align in the specified range and node. + * + * When allocation direction is bottom-up, the @start should be greater + * than the end of the kernel image. Otherwise, it will be trimmed. The + * reason is that we want the bottom-up allocation just near the kernel + * image so it is highly likely that the allocated memory and the kernel + * will reside in the same node. + * + * If bottom-up allocation failed, will try to allocate memory top-down. + * + * RETURNS: + * Found address on success, 0 on failure. + */ +phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid) { - long i; - - BUG_ON(0 == size); + int ret; + phys_addr_t kernel_end; - /* Pump up max_addr */ + /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE) end = memblock.current_limit; - /* We do a top-down search, this tends to limit memory - * fragmentation by keeping early boot allocs near the - * top of memory + /* avoid allocating the first page */ + start = max_t(phys_addr_t, start, PAGE_SIZE); + end = max(start, end); + kernel_end = __pa_symbol(_end); + + /* + * try bottom-up allocation only when bottom-up mode + * is set and @end is above the kernel image. */ - for (i = memblock.memory.cnt - 1; i >= 0; i--) { - phys_addr_t memblockbase = memblock.memory.regions[i].base; - phys_addr_t memblocksize = memblock.memory.regions[i].size; - phys_addr_t bottom, top, found; + if (memblock_bottom_up() && end > kernel_end) { + phys_addr_t bottom_up_start; - if (memblocksize < size) - continue; - if ((memblockbase + memblocksize) <= start) - break; - bottom = max(memblockbase, start); - top = min(memblockbase + memblocksize, end); - if (bottom >= top) - continue; - found = memblock_find_region(bottom, top, size, align); - if (found != MEMBLOCK_ERROR) - return found; + /* make sure we will allocate above the kernel */ + bottom_up_start = max(start, kernel_end); + + /* ok, try bottom-up allocation first */ + ret = __memblock_find_range_bottom_up(bottom_up_start, end, + size, align, nid); + if (ret) + return ret; + + /* + * we always limit bottom-up allocation above the kernel, + * but top-down allocation doesn't have the limit, so + * retrying top-down allocation may succeed when bottom-up + * allocation failed. + * + * bottom-up allocation is expected to be fail very rarely, + * so we use WARN_ONCE() here to see the stack trace if + * fail happens. + */ + WARN_ONCE(1, "memblock: bottom-up allocation failed, " + "memory hotunplug may be affected\n"); } - return MEMBLOCK_ERROR; + + return __memblock_find_range_top_down(start, end, size, align, nid); } -/* - * Find a free area with specified alignment in a specific range. +/** + * memblock_find_in_range - find free area in given range + * @start: start of candidate range + * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE} + * @size: size of free area to find + * @align: alignment of free area to find + * + * Find @size free area aligned to @align in the specified range. + * + * RETURNS: + * Found address on success, 0 on failure. */ -u64 __init_memblock memblock_find_in_range(u64 start, u64 end, u64 size, u64 align) +phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, + phys_addr_t end, phys_addr_t size, + phys_addr_t align) { - return memblock_find_base(size, align, start, end); + return memblock_find_in_range_node(size, align, start, end, + NUMA_NO_NODE); } -/* - * Free memblock.reserved.regions - */ -int __init_memblock memblock_free_reserved_regions(void) +static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { - if (memblock.reserved.regions == memblock_reserved_init_regions) - return 0; + type->total_size -= type->regions[r].size; + memmove(&type->regions[r], &type->regions[r + 1], + (type->cnt - (r + 1)) * sizeof(type->regions[r])); + type->cnt--; - return memblock_free(__pa(memblock.reserved.regions), - sizeof(struct memblock_region) * memblock.reserved.max); + /* Special case for empty arrays */ + if (type->cnt == 0) { + WARN_ON(type->total_size != 0); + type->cnt = 1; + type->regions[0].base = 0; + type->regions[0].size = 0; + type->regions[0].flags = 0; + memblock_set_region_node(&type->regions[0], MAX_NUMNODES); + } } -/* - * Reserve memblock.reserved.regions - */ -int __init_memblock memblock_reserve_reserved_regions(void) +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + +phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( + phys_addr_t *addr) { if (memblock.reserved.regions == memblock_reserved_init_regions) return 0; - return memblock_reserve(__pa(memblock.reserved.regions), - sizeof(struct memblock_region) * memblock.reserved.max); + *addr = __pa(memblock.reserved.regions); + + return PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.reserved.max); } -static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) +phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( + phys_addr_t *addr) { - unsigned long i; + if (memblock.memory.regions == memblock_memory_init_regions) + return 0; - for (i = r; i < type->cnt - 1; i++) { - type->regions[i].base = type->regions[i + 1].base; - type->regions[i].size = type->regions[i + 1].size; - } - type->cnt--; -} + *addr = __pa(memblock.memory.regions); -/* Assumption: base addr of region 1 < base addr of region 2 */ -static void __init_memblock memblock_coalesce_regions(struct memblock_type *type, - unsigned long r1, unsigned long r2) -{ - type->regions[r1].size += type->regions[r2].size; - memblock_remove_region(type, r2); + return PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); } -/* Defined below but needed now */ -static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); +#endif -static int __init_memblock memblock_double_array(struct memblock_type *type) +/** + * memblock_double_array - double the size of the memblock regions array + * @type: memblock type of the regions array being doubled + * @new_area_start: starting address of memory range to avoid overlap with + * @new_area_size: size of memory range to avoid overlap with + * + * Double the size of the @type regions array. If memblock is being used to + * allocate memory for a new reserved regions array and there is a previously + * allocated memory range [@new_area_start,@new_area_start+@new_area_size] + * waiting to be reserved, ensure the memory used by the new array does + * not overlap. + * + * RETURNS: + * 0 on success, -1 on failure. + */ +static int __init_memblock memblock_double_array(struct memblock_type *type, + phys_addr_t new_area_start, + phys_addr_t new_area_size) { struct memblock_region *new_array, *old_array; + phys_addr_t old_alloc_size, new_alloc_size; phys_addr_t old_size, new_size, addr; int use_slab = slab_is_available(); + int *in_slab; /* We don't allow resizing until we know about the reserved regions * of memory that aren't suitable for allocation @@ -234,36 +337,62 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) /* Calculate new doubled size */ old_size = type->max * sizeof(struct memblock_region); new_size = old_size << 1; + /* + * We need to allocated new one align to PAGE_SIZE, + * so we can free them completely later. + */ + old_alloc_size = PAGE_ALIGN(old_size); + new_alloc_size = PAGE_ALIGN(new_size); + + /* Retrieve the slab flag */ + if (type == &memblock.memory) + in_slab = &memblock_memory_in_slab; + else + in_slab = &memblock_reserved_in_slab; /* Try to find some space for it. * * WARNING: We assume that either slab_is_available() and we use it or - * we use MEMBLOCK for allocations. That means that this is unsafe to use - * when bootmem is currently active (unless bootmem itself is implemented - * on top of MEMBLOCK which isn't the case yet) + * we use MEMBLOCK for allocations. That means that this is unsafe to + * use when bootmem is currently active (unless bootmem itself is + * implemented on top of MEMBLOCK which isn't the case yet) * * This should however not be an issue for now, as we currently only - * call into MEMBLOCK while it's still active, or much later when slab is - * active for memory hotplug operations + * call into MEMBLOCK while it's still active, or much later when slab + * is active for memory hotplug operations */ if (use_slab) { new_array = kmalloc(new_size, GFP_KERNEL); - addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array); - } else - addr = memblock_find_base(new_size, sizeof(phys_addr_t), 0, MEMBLOCK_ALLOC_ACCESSIBLE); - if (addr == MEMBLOCK_ERROR) { + addr = new_array ? __pa(new_array) : 0; + } else { + /* only exclude range when trying to double reserved.regions */ + if (type != &memblock.reserved) + new_area_start = new_area_size = 0; + + addr = memblock_find_in_range(new_area_start + new_area_size, + memblock.current_limit, + new_alloc_size, PAGE_SIZE); + if (!addr && new_area_size) + addr = memblock_find_in_range(0, + min(new_area_start, memblock.current_limit), + new_alloc_size, PAGE_SIZE); + + new_array = addr ? __va(addr) : NULL; + } + if (!addr) { pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n", memblock_type_name(type), type->max, type->max * 2); return -1; } - new_array = __va(addr); - memblock_dbg("memblock: %s array is doubled to %ld at [%#010llx-%#010llx]", - memblock_type_name(type), type->max * 2, (u64)addr, (u64)addr + new_size - 1); + memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]", + memblock_type_name(type), type->max * 2, (u64)addr, + (u64)addr + new_size - 1); - /* Found space, we now need to move the array over before - * we add the reserved region since it may be our reserved - * array itself that is full. + /* + * Found space, we now need to move the array over before we add the + * reserved region since it may be our reserved array itself that is + * full. */ memcpy(new_array, type->regions, old_size); memset(new_array + type->max, 0, old_size); @@ -271,208 +400,684 @@ static int __init_memblock memblock_double_array(struct memblock_type *type) type->regions = new_array; type->max <<= 1; - /* If we use SLAB that's it, we are done */ - if (use_slab) - return 0; - - /* Add the new reserved region now. Should not fail ! */ - BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size) < 0); + /* Free old array. We needn't free it if the array is the static one */ + if (*in_slab) + kfree(old_array); + else if (old_array != memblock_memory_init_regions && + old_array != memblock_reserved_init_regions) + memblock_free(__pa(old_array), old_alloc_size); - /* If the array wasn't our static init one, then free it. We only do - * that before SLAB is available as later on, we don't know whether - * to use kfree or free_bootmem_pages(). Shouldn't be a big deal - * anyways + /* + * Reserve the new array if that comes from the memblock. Otherwise, we + * needn't do it */ - if (old_array != memblock_memory_init_regions && - old_array != memblock_reserved_init_regions) - memblock_free(__pa(old_array), old_size); + if (!use_slab) + BUG_ON(memblock_reserve(addr, new_alloc_size)); + + /* Update slab flag */ + *in_slab = use_slab; return 0; } -extern int __init_memblock __weak memblock_memory_can_coalesce(phys_addr_t addr1, phys_addr_t size1, - phys_addr_t addr2, phys_addr_t size2) +/** + * memblock_merge_regions - merge neighboring compatible regions + * @type: memblock type to scan + * + * Scan @type and merge neighboring compatible regions. + */ +static void __init_memblock memblock_merge_regions(struct memblock_type *type) { - return 1; + int i = 0; + + /* cnt never goes below 1 */ + while (i < type->cnt - 1) { + struct memblock_region *this = &type->regions[i]; + struct memblock_region *next = &type->regions[i + 1]; + + if (this->base + this->size != next->base || + memblock_get_region_node(this) != + memblock_get_region_node(next) || + this->flags != next->flags) { + BUG_ON(this->base + this->size > next->base); + i++; + continue; + } + + this->size += next->size; + /* move forward from next + 1, index of which is i + 2 */ + memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next)); + type->cnt--; + } } -static long __init_memblock memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +/** + * memblock_insert_region - insert new memblock region + * @type: memblock type to insert into + * @idx: index for the insertion point + * @base: base address of the new region + * @size: size of the new region + * @nid: node id of the new region + * @flags: flags of the new region + * + * Insert new memblock region [@base,@base+@size) into @type at @idx. + * @type must already have extra room to accomodate the new region. + */ +static void __init_memblock memblock_insert_region(struct memblock_type *type, + int idx, phys_addr_t base, + phys_addr_t size, + int nid, unsigned long flags) +{ + struct memblock_region *rgn = &type->regions[idx]; + + BUG_ON(type->cnt >= type->max); + memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn)); + rgn->base = base; + rgn->size = size; + rgn->flags = flags; + memblock_set_region_node(rgn, nid); + type->cnt++; + type->total_size += size; +} + +/** + * memblock_add_range - add new memblock region + * @type: memblock type to add new region into + * @base: base address of the new region + * @size: size of the new region + * @nid: nid of the new region + * @flags: flags of the new region + * + * Add new memblock region [@base,@base+@size) into @type. The new region + * is allowed to overlap with existing ones - overlaps don't affect already + * existing regions. @type is guaranteed to be minimal (all neighbouring + * compatible regions are merged) after the addition. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init_memblock memblock_add_range(struct memblock_type *type, + phys_addr_t base, phys_addr_t size, + int nid, unsigned long flags) { - unsigned long coalesced = 0; - long adjacent, i; + bool insert = false; + phys_addr_t obase = base; + phys_addr_t end = base + memblock_cap_size(base, &size); + int i, nr_new; - if ((type->cnt == 1) && (type->regions[0].size == 0)) { + if (!size) + return 0; + + /* special case for empty array */ + if (type->regions[0].size == 0) { + WARN_ON(type->cnt != 1 || type->total_size); type->regions[0].base = base; type->regions[0].size = size; + type->regions[0].flags = flags; + memblock_set_region_node(&type->regions[0], nid); + type->total_size = size; return 0; } +repeat: + /* + * The following is executed twice. Once with %false @insert and + * then with %true. The first counts the number of regions needed + * to accomodate the new area. The second actually inserts them. + */ + base = obase; + nr_new = 0; - /* First try and coalesce this MEMBLOCK with another. */ for (i = 0; i < type->cnt; i++) { - phys_addr_t rgnbase = type->regions[i].base; - phys_addr_t rgnsize = type->regions[i].size; + struct memblock_region *rgn = &type->regions[i]; + phys_addr_t rbase = rgn->base; + phys_addr_t rend = rbase + rgn->size; - if ((rgnbase == base) && (rgnsize == size)) - /* Already have this region, so we're done */ - return 0; - - adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize); - /* Check if arch allows coalescing */ - if (adjacent != 0 && type == &memblock.memory && - !memblock_memory_can_coalesce(base, size, rgnbase, rgnsize)) - break; - if (adjacent > 0) { - type->regions[i].base -= size; - type->regions[i].size += size; - coalesced++; - break; - } else if (adjacent < 0) { - type->regions[i].size += size; - coalesced++; + if (rbase >= end) break; + if (rend <= base) + continue; + /* + * @rgn overlaps. If it separates the lower part of new + * area, insert that portion. + */ + if (rbase > base) { + nr_new++; + if (insert) + memblock_insert_region(type, i++, base, + rbase - base, nid, + flags); } + /* area below @rend is dealt with, forget about it */ + base = min(rend, end); + } + + /* insert the remaining portion */ + if (base < end) { + nr_new++; + if (insert) + memblock_insert_region(type, i, base, end - base, + nid, flags); } - /* If we plugged a hole, we may want to also coalesce with the - * next region + /* + * If this was the first round, resize array and repeat for actual + * insertions; otherwise, merge and return. */ - if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1) && - ((type != &memblock.memory || memblock_memory_can_coalesce(type->regions[i].base, - type->regions[i].size, - type->regions[i+1].base, - type->regions[i+1].size)))) { - memblock_coalesce_regions(type, i, i+1); - coalesced++; + if (!insert) { + while (type->cnt + nr_new > type->max) + if (memblock_double_array(type, obase, size) < 0) + return -ENOMEM; + insert = true; + goto repeat; + } else { + memblock_merge_regions(type); + return 0; } +} - if (coalesced) - return coalesced; +int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size, + int nid) +{ + return memblock_add_range(&memblock.memory, base, size, nid, 0); +} - /* If we are out of space, we fail. It's too late to resize the array - * but then this shouldn't have happened in the first place. - */ - if (WARN_ON(type->cnt >= type->max)) - return -1; +int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) +{ + return memblock_add_range(&memblock.memory, base, size, + MAX_NUMNODES, 0); +} - /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */ - for (i = type->cnt - 1; i >= 0; i--) { - if (base < type->regions[i].base) { - type->regions[i+1].base = type->regions[i].base; - type->regions[i+1].size = type->regions[i].size; - } else { - type->regions[i+1].base = base; - type->regions[i+1].size = size; +/** + * memblock_isolate_range - isolate given range into disjoint memblocks + * @type: memblock type to isolate range for + * @base: base of range to isolate + * @size: size of range to isolate + * @start_rgn: out parameter for the start of isolated region + * @end_rgn: out parameter for the end of isolated region + * + * Walk @type and ensure that regions don't cross the boundaries defined by + * [@base,@base+@size). Crossing regions are split at the boundaries, + * which may create at most two more regions. The index of the first + * region inside the range is returned in *@start_rgn and end in *@end_rgn. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int __init_memblock memblock_isolate_range(struct memblock_type *type, + phys_addr_t base, phys_addr_t size, + int *start_rgn, int *end_rgn) +{ + phys_addr_t end = base + memblock_cap_size(base, &size); + int i; + + *start_rgn = *end_rgn = 0; + + if (!size) + return 0; + + /* we'll create at most two more regions */ + while (type->cnt + 2 > type->max) + if (memblock_double_array(type, base, size) < 0) + return -ENOMEM; + + for (i = 0; i < type->cnt; i++) { + struct memblock_region *rgn = &type->regions[i]; + phys_addr_t rbase = rgn->base; + phys_addr_t rend = rbase + rgn->size; + + if (rbase >= end) break; + if (rend <= base) + continue; + + if (rbase < base) { + /* + * @rgn intersects from below. Split and continue + * to process the next region - the new top half. + */ + rgn->base = base; + rgn->size -= base - rbase; + type->total_size -= base - rbase; + memblock_insert_region(type, i, rbase, base - rbase, + memblock_get_region_node(rgn), + rgn->flags); + } else if (rend > end) { + /* + * @rgn intersects from above. Split and redo the + * current region - the new bottom half. + */ + rgn->base = end; + rgn->size -= end - rbase; + type->total_size -= end - rbase; + memblock_insert_region(type, i--, rbase, end - rbase, + memblock_get_region_node(rgn), + rgn->flags); + } else { + /* @rgn is fully contained, record it */ + if (!*end_rgn) + *start_rgn = i; + *end_rgn = i + 1; } } - if (base < type->regions[0].base) { - type->regions[0].base = base; - type->regions[0].size = size; - } - type->cnt++; + return 0; +} - /* The array is full ? Try to resize it. If that fails, we undo - * our allocation and return an error - */ - if (type->cnt == type->max && memblock_double_array(type)) { - type->cnt--; - return -1; - } +int __init_memblock memblock_remove_range(struct memblock_type *type, + phys_addr_t base, phys_addr_t size) +{ + int start_rgn, end_rgn; + int i, ret; + + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; + for (i = end_rgn - 1; i >= start_rgn; i--) + memblock_remove_region(type, i); return 0; } -long __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) +int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) { - return memblock_add_region(&memblock.memory, base, size); + return memblock_remove_range(&memblock.memory, base, size); +} + + +int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) +{ + memblock_dbg(" memblock_free: [%#016llx-%#016llx] %pF\n", + (unsigned long long)base, + (unsigned long long)base + size - 1, + (void *)_RET_IP_); + kmemleak_free_part(__va(base), size); + return memblock_remove_range(&memblock.reserved, base, size); } -static long __init_memblock __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size) +static int __init_memblock memblock_reserve_region(phys_addr_t base, + phys_addr_t size, + int nid, + unsigned long flags) { - phys_addr_t rgnbegin, rgnend; - phys_addr_t end = base + size; - int i; + struct memblock_type *_rgn = &memblock.reserved; - rgnbegin = rgnend = 0; /* supress gcc warnings */ + memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n", + (unsigned long long)base, + (unsigned long long)base + size - 1, + flags, (void *)_RET_IP_); - /* Find the region where (base, size) belongs to */ - for (i=0; i < type->cnt; i++) { - rgnbegin = type->regions[i].base; - rgnend = rgnbegin + type->regions[i].size; + return memblock_add_range(_rgn, base, size, nid, flags); +} - if ((rgnbegin <= base) && (end <= rgnend)) - break; - } +int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) +{ + return memblock_reserve_region(base, size, MAX_NUMNODES, 0); +} - /* Didn't find the region */ - if (i == type->cnt) - return -1; +/** + * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG. + * @base: the base phys addr of the region + * @size: the size of the region + * + * This function isolates region [@base, @base + @size), and mark it with flag + * MEMBLOCK_HOTPLUG. + * + * Return 0 on succees, -errno on failure. + */ +int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size) +{ + struct memblock_type *type = &memblock.memory; + int i, ret, start_rgn, end_rgn; - /* Check to see if we are removing entire region */ - if ((rgnbegin == base) && (rgnend == end)) { - memblock_remove_region(type, i); - return 0; - } + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; - /* Check to see if region is matching at the front */ - if (rgnbegin == base) { - type->regions[i].base = end; - type->regions[i].size -= size; - return 0; - } + for (i = start_rgn; i < end_rgn; i++) + memblock_set_region_flags(&type->regions[i], MEMBLOCK_HOTPLUG); - /* Check to see if the region is matching at the end */ - if (rgnend == end) { - type->regions[i].size -= size; - return 0; + memblock_merge_regions(type); + return 0; +} + +/** + * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region. + * @base: the base phys addr of the region + * @size: the size of the region + * + * This function isolates region [@base, @base + @size), and clear flag + * MEMBLOCK_HOTPLUG for the isolated regions. + * + * Return 0 on succees, -errno on failure. + */ +int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size) +{ + struct memblock_type *type = &memblock.memory; + int i, ret, start_rgn, end_rgn; + + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; + + for (i = start_rgn; i < end_rgn; i++) + memblock_clear_region_flags(&type->regions[i], + MEMBLOCK_HOTPLUG); + + memblock_merge_regions(type); + return 0; +} + +/** + * __next__mem_range - next function for for_each_free_mem_range() etc. + * @idx: pointer to u64 loop variable + * @nid: node selector, %NUMA_NO_NODE for all nodes + * @type_a: pointer to memblock_type from where the range is taken + * @type_b: pointer to memblock_type which excludes memory from being taken + * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @out_nid: ptr to int for nid of the range, can be %NULL + * + * Find the first area from *@idx which matches @nid, fill the out + * parameters, and update *@idx for the next iteration. The lower 32bit of + * *@idx contains index into type_a and the upper 32bit indexes the + * areas before each region in type_b. For example, if type_b regions + * look like the following, + * + * 0:[0-16), 1:[32-48), 2:[128-130) + * + * The upper 32bit indexes the following regions. + * + * 0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX) + * + * As both region arrays are sorted, the function advances the two indices + * in lockstep and returns each intersection. + */ +void __init_memblock __next_mem_range(u64 *idx, int nid, + struct memblock_type *type_a, + struct memblock_type *type_b, + phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid) +{ + int idx_a = *idx & 0xffffffff; + int idx_b = *idx >> 32; + + if (WARN_ONCE(nid == MAX_NUMNODES, + "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) + nid = NUMA_NO_NODE; + + for (; idx_a < type_a->cnt; idx_a++) { + struct memblock_region *m = &type_a->regions[idx_a]; + + phys_addr_t m_start = m->base; + phys_addr_t m_end = m->base + m->size; + int m_nid = memblock_get_region_node(m); + + /* only memory regions are associated with nodes, check it */ + if (nid != NUMA_NO_NODE && nid != m_nid) + continue; + + if (!type_b) { + if (out_start) + *out_start = m_start; + if (out_end) + *out_end = m_end; + if (out_nid) + *out_nid = m_nid; + idx_a++; + *idx = (u32)idx_a | (u64)idx_b << 32; + return; + } + + /* scan areas before each reservation */ + for (; idx_b < type_b->cnt + 1; idx_b++) { + struct memblock_region *r; + phys_addr_t r_start; + phys_addr_t r_end; + + r = &type_b->regions[idx_b]; + r_start = idx_b ? r[-1].base + r[-1].size : 0; + r_end = idx_b < type_b->cnt ? + r->base : ULLONG_MAX; + + /* + * if idx_b advanced past idx_a, + * break out to advance idx_a + */ + if (r_start >= m_end) + break; + /* if the two regions intersect, we're done */ + if (m_start < r_end) { + if (out_start) + *out_start = + max(m_start, r_start); + if (out_end) + *out_end = min(m_end, r_end); + if (out_nid) + *out_nid = m_nid; + /* + * The region which ends first is + * advanced for the next iteration. + */ + if (m_end <= r_end) + idx_a++; + else + idx_b++; + *idx = (u32)idx_a | (u64)idx_b << 32; + return; + } + } } - /* - * We need to split the entry - adjust the current one to the - * beginging of the hole and add the region after hole. - */ - type->regions[i].size = base - type->regions[i].base; - return memblock_add_region(type, end, rgnend - end); + /* signal end of iteration */ + *idx = ULLONG_MAX; } -long __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size) +/** + * __next_mem_range_rev - generic next function for for_each_*_range_rev() + * + * Finds the next range from type_a which is not marked as unsuitable + * in type_b. + * + * @idx: pointer to u64 loop variable + * @nid: nid: node selector, %NUMA_NO_NODE for all nodes + * @type_a: pointer to memblock_type from where the range is taken + * @type_b: pointer to memblock_type which excludes memory from being taken + * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL + * @out_nid: ptr to int for nid of the range, can be %NULL + * + * Reverse of __next_mem_range(). + */ +void __init_memblock __next_mem_range_rev(u64 *idx, int nid, + struct memblock_type *type_a, + struct memblock_type *type_b, + phys_addr_t *out_start, + phys_addr_t *out_end, int *out_nid) { - return __memblock_remove(&memblock.memory, base, size); + int idx_a = *idx & 0xffffffff; + int idx_b = *idx >> 32; + + if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) + nid = NUMA_NO_NODE; + + if (*idx == (u64)ULLONG_MAX) { + idx_a = type_a->cnt - 1; + idx_b = type_b->cnt; + } + + for (; idx_a >= 0; idx_a--) { + struct memblock_region *m = &type_a->regions[idx_a]; + + phys_addr_t m_start = m->base; + phys_addr_t m_end = m->base + m->size; + int m_nid = memblock_get_region_node(m); + + /* only memory regions are associated with nodes, check it */ + if (nid != NUMA_NO_NODE && nid != m_nid) + continue; + + /* skip hotpluggable memory regions if needed */ + if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) + continue; + + if (!type_b) { + if (out_start) + *out_start = m_start; + if (out_end) + *out_end = m_end; + if (out_nid) + *out_nid = m_nid; + idx_a++; + *idx = (u32)idx_a | (u64)idx_b << 32; + return; + } + + /* scan areas before each reservation */ + for (; idx_b >= 0; idx_b--) { + struct memblock_region *r; + phys_addr_t r_start; + phys_addr_t r_end; + + r = &type_b->regions[idx_b]; + r_start = idx_b ? r[-1].base + r[-1].size : 0; + r_end = idx_b < type_b->cnt ? + r->base : ULLONG_MAX; + /* + * if idx_b advanced past idx_a, + * break out to advance idx_a + */ + + if (r_end <= m_start) + break; + /* if the two regions intersect, we're done */ + if (m_end > r_start) { + if (out_start) + *out_start = max(m_start, r_start); + if (out_end) + *out_end = min(m_end, r_end); + if (out_nid) + *out_nid = m_nid; + if (m_start >= r_start) + idx_a--; + else + idx_b--; + *idx = (u32)idx_a | (u64)idx_b << 32; + return; + } + } + } + /* signal end of iteration */ + *idx = ULLONG_MAX; } -long __init_memblock memblock_free(phys_addr_t base, phys_addr_t size) +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +/* + * Common iterator interface used to define for_each_mem_range(). + */ +void __init_memblock __next_mem_pfn_range(int *idx, int nid, + unsigned long *out_start_pfn, + unsigned long *out_end_pfn, int *out_nid) { - return __memblock_remove(&memblock.reserved, base, size); + struct memblock_type *type = &memblock.memory; + struct memblock_region *r; + + while (++*idx < type->cnt) { + r = &type->regions[*idx]; + + if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) + continue; + if (nid == MAX_NUMNODES || nid == r->nid) + break; + } + if (*idx >= type->cnt) { + *idx = -1; + return; + } + + if (out_start_pfn) + *out_start_pfn = PFN_UP(r->base); + if (out_end_pfn) + *out_end_pfn = PFN_DOWN(r->base + r->size); + if (out_nid) + *out_nid = r->nid; } -long __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size) +/** + * memblock_set_node - set node ID on memblock regions + * @base: base of area to set node ID for + * @size: size of area to set node ID for + * @type: memblock type to set node ID for + * @nid: node ID to set + * + * Set the nid of memblock @type regions in [@base,@base+@size) to @nid. + * Regions which cross the area boundaries are split as necessary. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, + struct memblock_type *type, int nid) { - struct memblock_type *_rgn = &memblock.reserved; + int start_rgn, end_rgn; + int i, ret; + + ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); + if (ret) + return ret; - BUG_ON(0 == size); + for (i = start_rgn; i < end_rgn; i++) + memblock_set_region_node(&type->regions[i], nid); - return memblock_add_region(_rgn, base, size); + memblock_merge_regions(type); + return 0; } +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) +static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t start, + phys_addr_t end, int nid) { phys_addr_t found; - /* We align the size to limit fragmentation. Without this, a lot of - * small allocs quickly eat up the whole reserve array on sparc - */ - size = memblock_align_up(size, align); + if (!align) + align = SMP_CACHE_BYTES; - found = memblock_find_base(size, align, 0, max_addr); - if (found != MEMBLOCK_ERROR && - memblock_add_region(&memblock.reserved, found, size) >= 0) + found = memblock_find_in_range_node(size, align, start, end, nid); + if (found && !memblock_reserve(found, size)) { + /* + * The min_count is set to 0 so that memblock allocations are + * never reported as leaks. + */ + kmemleak_alloc(__va(found), size, 0, 0); return found; - + } return 0; } +phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, + phys_addr_t start, phys_addr_t end) +{ + return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE); +} + +static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size, + phys_addr_t align, phys_addr_t max_addr, + int nid) +{ + return memblock_alloc_range_nid(size, align, 0, max_addr, nid); +} + +phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) +{ + return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid); +} + +phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) +{ + return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE); +} + phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr) { phys_addr_t alloc; @@ -491,116 +1096,247 @@ phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align) return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); } +phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) +{ + phys_addr_t res = memblock_alloc_nid(size, align, nid); -/* - * Additional node-local allocators. Search for node memory is bottom up - * and walks memblock regions within that node bottom-up as well, but allocation - * within an memblock region is top-down. XXX I plan to fix that at some stage + if (res) + return res; + return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); +} + +/** + * memblock_virt_alloc_internal - allocate boot memory block + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region to allocate (phys address) + * @max_addr: the upper bound of the memory region to allocate (phys address) + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * The @min_addr limit is dropped if it can not be satisfied and the allocation + * will fall back to memory below @min_addr. Also, allocation may fall back + * to any node in the system if the specified node can not + * hold the requested memory. + * + * The allocation is performed from memory region limited by + * memblock.current_limit if @max_addr == %BOOTMEM_ALLOC_ACCESSIBLE. + * + * The memory block is aligned on SMP_CACHE_BYTES if @align == 0. + * + * The phys address of allocated boot memory block is converted to virtual and + * allocated memory is reset to 0. * - * WARNING: Only available after early_node_map[] has been populated, - * on some architectures, that is after all the calls to add_active_range() - * have been done to populate it. + * In addition, function sets the min_count to 0 using kmemleak_alloc for + * allocated boot memory block, so that it is never reported as leaks. + * + * RETURNS: + * Virtual address of allocated memory block on success, NULL on failure. */ - -phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid) +static void * __init memblock_virt_alloc_internal( + phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid) { -#ifdef CONFIG_ARCH_POPULATES_NODE_MAP + phys_addr_t alloc; + void *ptr; + + if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) + nid = NUMA_NO_NODE; + /* - * This code originates from sparc which really wants use to walk by addresses - * and returns the nid. This is not very convenient for early_pfn_map[] users - * as the map isn't sorted yet, and it really wants to be walked by nid. - * - * For now, I implement the inefficient method below which walks the early - * map multiple times. Eventually we may want to use an ARCH config option - * to implement a completely different method for both case. + * Detect any accidental use of these APIs after slab is ready, as at + * this moment memblock may be deinitialized already and its + * internal data may be destroyed (after execution of free_all_bootmem) */ - unsigned long start_pfn, end_pfn; - int i; - - for (i = 0; i < MAX_NUMNODES; i++) { - get_pfn_range_for_nid(i, &start_pfn, &end_pfn); - if (start < PFN_PHYS(start_pfn) || start >= PFN_PHYS(end_pfn)) - continue; - *nid = i; - return min(end, PFN_PHYS(end_pfn)); + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, nid); + + if (!align) + align = SMP_CACHE_BYTES; + + if (max_addr > memblock.current_limit) + max_addr = memblock.current_limit; + +again: + alloc = memblock_find_in_range_node(size, align, min_addr, max_addr, + nid); + if (alloc) + goto done; + + if (nid != NUMA_NO_NODE) { + alloc = memblock_find_in_range_node(size, align, min_addr, + max_addr, NUMA_NO_NODE); + if (alloc) + goto done; } -#endif - *nid = 0; - return end; -} + if (min_addr) { + min_addr = 0; + goto again; + } else { + goto error; + } -static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp, - phys_addr_t size, - phys_addr_t align, int nid) -{ - phys_addr_t start, end; +done: + memblock_reserve(alloc, size); + ptr = phys_to_virt(alloc); + memset(ptr, 0, size); - start = mp->base; - end = start + mp->size; + /* + * The min_count is set to 0 so that bootmem allocated blocks + * are never reported as leaks. This is because many of these blocks + * are only referred via the physical address which is not + * looked up by kmemleak. + */ + kmemleak_alloc(ptr, size, 0, 0); - start = memblock_align_up(start, align); - while (start < end) { - phys_addr_t this_end; - int this_nid; + return ptr; - this_end = memblock_nid_range(start, end, &this_nid); - if (this_nid == nid) { - phys_addr_t ret = memblock_find_region(start, this_end, size, align); - if (ret != MEMBLOCK_ERROR && - memblock_add_region(&memblock.reserved, ret, size) >= 0) - return ret; - } - start = this_end; - } - - return MEMBLOCK_ERROR; +error: + return NULL; } -phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid) +/** + * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region from where the allocation + * is preferred (phys address) + * @max_addr: the upper bound of the memory region from where the allocation + * is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to + * allocate only from memory limited by memblock.current_limit value + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Public version of _memblock_virt_alloc_try_nid_nopanic() which provides + * additional debug information (including caller info), if enabled. + * + * RETURNS: + * Virtual address of allocated memory block on success, NULL on failure. + */ +void * __init memblock_virt_alloc_try_nid_nopanic( + phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid) { - struct memblock_type *mem = &memblock.memory; - int i; - - BUG_ON(0 == size); - - /* We align the size to limit fragmentation. Without this, a lot of - * small allocs quickly eat up the whole reserve array on sparc - */ - size = memblock_align_up(size, align); - - /* We do a bottom-up search for a region with the right - * nid since that's easier considering how memblock_nid_range() - * works - */ - for (i = 0; i < mem->cnt; i++) { - phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i], - size, align, nid); - if (ret != MEMBLOCK_ERROR) - return ret; - } - - return 0; + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", + __func__, (u64)size, (u64)align, nid, (u64)min_addr, + (u64)max_addr, (void *)_RET_IP_); + return memblock_virt_alloc_internal(size, align, min_addr, + max_addr, nid); } -phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid) +/** + * memblock_virt_alloc_try_nid - allocate boot memory block with panicking + * @size: size of memory block to be allocated in bytes + * @align: alignment of the region and block's size + * @min_addr: the lower bound of the memory region from where the allocation + * is preferred (phys address) + * @max_addr: the upper bound of the memory region from where the allocation + * is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to + * allocate only from memory limited by memblock.current_limit value + * @nid: nid of the free area to find, %NUMA_NO_NODE for any node + * + * Public panicking version of _memblock_virt_alloc_try_nid_nopanic() + * which provides debug information (including caller info), if enabled, + * and panics if the request can not be satisfied. + * + * RETURNS: + * Virtual address of allocated memory block on success, NULL on failure. + */ +void * __init memblock_virt_alloc_try_nid( + phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, + int nid) { - phys_addr_t res = memblock_alloc_nid(size, align, nid); + void *ptr; + + memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n", + __func__, (u64)size, (u64)align, nid, (u64)min_addr, + (u64)max_addr, (void *)_RET_IP_); + ptr = memblock_virt_alloc_internal(size, align, + min_addr, max_addr, nid); + if (ptr) + return ptr; + + panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx\n", + __func__, (u64)size, (u64)align, nid, (u64)min_addr, + (u64)max_addr); + return NULL; +} - if (res) - return res; - return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); +/** + * __memblock_free_early - free boot memory block + * @base: phys starting address of the boot memory block + * @size: size of the boot memory block in bytes + * + * Free boot memory block previously allocated by memblock_virt_alloc_xx() API. + * The freeing memory will not be released to the buddy allocator. + */ +void __init __memblock_free_early(phys_addr_t base, phys_addr_t size) +{ + memblock_dbg("%s: [%#016llx-%#016llx] %pF\n", + __func__, (u64)base, (u64)base + size - 1, + (void *)_RET_IP_); + kmemleak_free_part(__va(base), size); + memblock_remove_range(&memblock.reserved, base, size); } +/* + * __memblock_free_late - free bootmem block pages directly to buddy allocator + * @addr: phys starting address of the boot memory block + * @size: size of the boot memory block in bytes + * + * This is only useful when the bootmem allocator has already been torn + * down, but we are still initializing the system. Pages are released directly + * to the buddy allocator, no bootmem metadata is updated because it is gone. + */ +void __init __memblock_free_late(phys_addr_t base, phys_addr_t size) +{ + u64 cursor, end; + + memblock_dbg("%s: [%#016llx-%#016llx] %pF\n", + __func__, (u64)base, (u64)base + size - 1, + (void *)_RET_IP_); + kmemleak_free_part(__va(base), size); + cursor = PFN_UP(base); + end = PFN_DOWN(base + size); + + for (; cursor < end; cursor++) { + __free_pages_bootmem(pfn_to_page(cursor), 0); + totalram_pages++; + } +} /* * Remaining API functions */ -/* You must call memblock_analyze() before this. */ phys_addr_t __init memblock_phys_mem_size(void) { - return memblock.memory_size; + return memblock.memory.total_size; +} + +phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) +{ + unsigned long pages = 0; + struct memblock_region *r; + unsigned long start_pfn, end_pfn; + + for_each_memblock(memory, r) { + start_pfn = memblock_region_memory_base_pfn(r); + end_pfn = memblock_region_memory_end_pfn(r); + start_pfn = min_t(unsigned long, start_pfn, limit_pfn); + end_pfn = min_t(unsigned long, end_pfn, limit_pfn); + pages += end_pfn - start_pfn; + } + + return PFN_PHYS(pages); +} + +/* lowest address */ +phys_addr_t __init_memblock memblock_start_of_DRAM(void) +{ + return memblock.memory.regions[0].base; } phys_addr_t __init_memblock memblock_end_of_DRAM(void) @@ -610,45 +1346,28 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void) return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size); } -/* You must call memblock_analyze() after this. */ -void __init memblock_enforce_memory_limit(phys_addr_t memory_limit) +void __init memblock_enforce_memory_limit(phys_addr_t limit) { - unsigned long i; - phys_addr_t limit; - struct memblock_region *p; + phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX; + struct memblock_region *r; - if (!memory_limit) + if (!limit) return; - /* Truncate the memblock regions to satisfy the memory limit. */ - limit = memory_limit; - for (i = 0; i < memblock.memory.cnt; i++) { - if (limit > memblock.memory.regions[i].size) { - limit -= memblock.memory.regions[i].size; - continue; + /* find out max address */ + for_each_memblock(memory, r) { + if (limit <= r->size) { + max_addr = r->base + limit; + break; } - - memblock.memory.regions[i].size = limit; - memblock.memory.cnt = i + 1; - break; + limit -= r->size; } - memory_limit = memblock_end_of_DRAM(); - - /* And truncate any reserves above the limit also. */ - for (i = 0; i < memblock.reserved.cnt; i++) { - p = &memblock.reserved.regions[i]; - - if (p->base > memory_limit) - p->size = 0; - else if ((p->base + p->size) > memory_limit) - p->size = memory_limit - p->base; - - if (p->size == 0) { - memblock_remove_region(&memblock.reserved, i); - i--; - } - } + /* truncate both memory and reserved regions */ + memblock_remove_range(&memblock.memory, max_addr, + (phys_addr_t)ULLONG_MAX); + memblock_remove_range(&memblock.reserved, max_addr, + (phys_addr_t)ULLONG_MAX); } static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr) @@ -679,108 +1398,137 @@ int __init_memblock memblock_is_memory(phys_addr_t addr) return memblock_search(&memblock.memory, addr) != -1; } +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +int __init_memblock memblock_search_pfn_nid(unsigned long pfn, + unsigned long *start_pfn, unsigned long *end_pfn) +{ + struct memblock_type *type = &memblock.memory; + int mid = memblock_search(type, PFN_PHYS(pfn)); + + if (mid == -1) + return -1; + + *start_pfn = PFN_DOWN(type->regions[mid].base); + *end_pfn = PFN_DOWN(type->regions[mid].base + type->regions[mid].size); + + return type->regions[mid].nid; +} +#endif + +/** + * memblock_is_region_memory - check if a region is a subset of memory + * @base: base of region to check + * @size: size of region to check + * + * Check if the region [@base, @base+@size) is a subset of a memory block. + * + * RETURNS: + * 0 if false, non-zero if true + */ int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size) { int idx = memblock_search(&memblock.memory, base); + phys_addr_t end = base + memblock_cap_size(base, &size); if (idx == -1) return 0; return memblock.memory.regions[idx].base <= base && (memblock.memory.regions[idx].base + - memblock.memory.regions[idx].size) >= (base + size); + memblock.memory.regions[idx].size) >= end; } +/** + * memblock_is_region_reserved - check if a region intersects reserved memory + * @base: base of region to check + * @size: size of region to check + * + * Check if the region [@base, @base+@size) intersects a reserved memory block. + * + * RETURNS: + * 0 if false, non-zero if true + */ int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { + memblock_cap_size(base, &size); return memblock_overlaps_region(&memblock.reserved, base, size) >= 0; } +void __init_memblock memblock_trim_memory(phys_addr_t align) +{ + phys_addr_t start, end, orig_start, orig_end; + struct memblock_region *r; + + for_each_memblock(memory, r) { + orig_start = r->base; + orig_end = r->base + r->size; + start = round_up(orig_start, align); + end = round_down(orig_end, align); + + if (start == orig_start && end == orig_end) + continue; + + if (start < end) { + r->base = start; + r->size = end - start; + } else { + memblock_remove_region(&memblock.memory, + r - memblock.memory.regions); + r--; + } + } +} void __init_memblock memblock_set_current_limit(phys_addr_t limit) { memblock.current_limit = limit; } -static void __init_memblock memblock_dump(struct memblock_type *region, char *name) +phys_addr_t __init_memblock memblock_get_current_limit(void) +{ + return memblock.current_limit; +} + +static void __init_memblock memblock_dump(struct memblock_type *type, char *name) { unsigned long long base, size; + unsigned long flags; int i; - pr_info(" %s.cnt = 0x%lx\n", name, region->cnt); - - for (i = 0; i < region->cnt; i++) { - base = region->regions[i].base; - size = region->regions[i].size; + pr_info(" %s.cnt = 0x%lx\n", name, type->cnt); - pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes\n", - name, i, base, base + size - 1, size); + for (i = 0; i < type->cnt; i++) { + struct memblock_region *rgn = &type->regions[i]; + char nid_buf[32] = ""; + + base = rgn->base; + size = rgn->size; + flags = rgn->flags; +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP + if (memblock_get_region_node(rgn) != MAX_NUMNODES) + snprintf(nid_buf, sizeof(nid_buf), " on node %d", + memblock_get_region_node(rgn)); +#endif + pr_info(" %s[%#x]\t[%#016llx-%#016llx], %#llx bytes%s flags: %#lx\n", + name, i, base, base + size - 1, size, nid_buf, flags); } } -void __init_memblock memblock_dump_all(void) +void __init_memblock __memblock_dump_all(void) { - if (!memblock_debug) - return; - pr_info("MEMBLOCK configuration:\n"); - pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size); + pr_info(" memory size = %#llx reserved size = %#llx\n", + (unsigned long long)memblock.memory.total_size, + (unsigned long long)memblock.reserved.total_size); memblock_dump(&memblock.memory, "memory"); memblock_dump(&memblock.reserved, "reserved"); } -void __init memblock_analyze(void) +void __init memblock_allow_resize(void) { - int i; - - /* Check marker in the unused last array entry */ - WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base - != (phys_addr_t)RED_INACTIVE); - WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base - != (phys_addr_t)RED_INACTIVE); - - memblock.memory_size = 0; - - for (i = 0; i < memblock.memory.cnt; i++) - memblock.memory_size += memblock.memory.regions[i].size; - - /* We allow resizing from there */ memblock_can_resize = 1; } -void __init memblock_init(void) -{ - static int init_done __initdata = 0; - - if (init_done) - return; - init_done = 1; - - /* Hookup the initial arrays */ - memblock.memory.regions = memblock_memory_init_regions; - memblock.memory.max = INIT_MEMBLOCK_REGIONS; - memblock.reserved.regions = memblock_reserved_init_regions; - memblock.reserved.max = INIT_MEMBLOCK_REGIONS; - - /* Write a marker in the unused last array entry */ - memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE; - memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE; - - /* Create a dummy zero size MEMBLOCK which will get coalesced away later. - * This simplifies the memblock_add() code below... - */ - memblock.memory.regions[0].base = 0; - memblock.memory.regions[0].size = 0; - memblock.memory.cnt = 1; - - /* Ditto. */ - memblock.reserved.regions[0].base = 0; - memblock.reserved.regions[0].size = 0; - memblock.reserved.cnt = 1; - - memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE; -} - static int __init early_memblock(char *p) { if (p && strstr(p, "debug")) @@ -789,7 +1537,7 @@ static int __init early_memblock(char *p) } early_param("memblock", early_memblock); -#if defined(CONFIG_DEBUG_FS) && !defined(ARCH_DISCARD_MEMBLOCK) +#if defined(CONFIG_DEBUG_FS) && !defined(CONFIG_ARCH_DISCARD_MEMBLOCK) static int memblock_debug_show(struct seq_file *m, void *private) { @@ -832,6 +1580,9 @@ static int __init memblock_init_debugfs(void) return -ENXIO; debugfs_create_file("memory", S_IRUGO, root, &memblock.memory, &memblock_debug_fops); debugfs_create_file("reserved", S_IRUGO, root, &memblock.reserved, &memblock_debug_fops); +#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP + debugfs_create_file("physmem", S_IRUGO, root, &memblock.physmem, &memblock_debug_fops); +#endif return 0; } |
