diff options
Diffstat (limited to 'mm/sparse.c')
| -rw-r--r-- | mm/sparse.c | 388 |
1 files changed, 199 insertions, 189 deletions
diff --git a/mm/sparse.c b/mm/sparse.c index 61d7cde2311..d1b48b691ac 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -5,10 +5,12 @@ #include <linux/slab.h> #include <linux/mmzone.h> #include <linux/bootmem.h> +#include <linux/compiler.h> #include <linux/highmem.h> #include <linux/export.h> #include <linux/spinlock.h> #include <linux/vmalloc.h> + #include "internal.h" #include <asm/dma.h> #include <asm/pgalloc.h> @@ -65,24 +67,20 @@ static struct mem_section noinline __init_refok *sparse_index_alloc(int nid) if (slab_is_available()) { if (node_state(nid, N_HIGH_MEMORY)) - section = kmalloc_node(array_size, GFP_KERNEL, nid); + section = kzalloc_node(array_size, GFP_KERNEL, nid); else - section = kmalloc(array_size, GFP_KERNEL); - } else - section = alloc_bootmem_node(NODE_DATA(nid), array_size); - - if (section) - memset(section, 0, array_size); + section = kzalloc(array_size, GFP_KERNEL); + } else { + section = memblock_virt_alloc_node(array_size, nid); + } return section; } static int __meminit sparse_index_init(unsigned long section_nr, int nid) { - static DEFINE_SPINLOCK(index_init_lock); unsigned long root = SECTION_NR_TO_ROOT(section_nr); struct mem_section *section; - int ret = 0; if (mem_section[root]) return -EEXIST; @@ -90,21 +88,10 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) section = sparse_index_alloc(nid); if (!section) return -ENOMEM; - /* - * This lock keeps two different sections from - * reallocating for the same index - */ - spin_lock(&index_init_lock); - - if (mem_section[root]) { - ret = -EEXIST; - goto out; - } mem_section[root] = section; -out: - spin_unlock(&index_init_lock); - return ret; + + return 0; } #else /* !SPARSEMEM_EXTREME */ static inline int sparse_index_init(unsigned long section_nr, int nid) @@ -132,6 +119,8 @@ int __section_nr(struct mem_section* ms) break; } + VM_BUG_ON(root_nr == NR_SECTION_ROOTS); + return (root_nr * SECTIONS_PER_ROOT) + (ms - root); } @@ -273,22 +262,33 @@ static unsigned long *__kmalloc_section_usemap(void) #ifdef CONFIG_MEMORY_HOTREMOVE static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, - unsigned long count) + unsigned long size) { - unsigned long section_nr; - + unsigned long goal, limit; + unsigned long *p; + int nid; /* * A page may contain usemaps for other sections preventing the * page being freed and making a section unremovable while - * other sections referencing the usemap retmain active. Similarly, + * other sections referencing the usemap remain active. Similarly, * a pgdat can prevent a section being removed. If section A * contains a pgdat and section B contains the usemap, both * sections become inter-dependent. This allocates usemaps * from the same section as the pgdat where possible to avoid * this problem. */ - section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); - return alloc_bootmem_section(usemap_size() * count, section_nr); + goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); + limit = goal + (1UL << PA_SECTION_SHIFT); + nid = early_pfn_to_nid(goal >> PAGE_SHIFT); +again: + p = memblock_virt_alloc_try_nid_nopanic(size, + SMP_CACHE_BYTES, goal, limit, + nid); + if (!p && limit) { + limit = 0; + goto again; + } + return p; } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) @@ -332,9 +332,9 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) #else static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, - unsigned long count) + unsigned long size) { - return NULL; + return memblock_virt_alloc_node_nopanic(size, pgdat->node_id); } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) @@ -342,40 +342,30 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) } #endif /* CONFIG_MEMORY_HOTREMOVE */ -static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, +static void __init sparse_early_usemaps_alloc_node(void *data, unsigned long pnum_begin, unsigned long pnum_end, unsigned long usemap_count, int nodeid) { void *usemap; unsigned long pnum; + unsigned long **usemap_map = (unsigned long **)data; int size = usemap_size(); usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), - usemap_count); - if (usemap) { - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { - if (!present_section_nr(pnum)) - continue; - usemap_map[pnum] = usemap; - usemap += size; - } + size * usemap_count); + if (!usemap) { + printk(KERN_WARNING "%s: allocation failed\n", __func__); return; } - usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); - if (usemap) { - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { - if (!present_section_nr(pnum)) - continue; - usemap_map[pnum] = usemap; - usemap += size; - check_usemap_section_nr(nodeid, usemap_map[pnum]); - } - return; + for (pnum = pnum_begin; pnum < pnum_end; pnum++) { + if (!present_section_nr(pnum)) + continue; + usemap_map[pnum] = usemap; + usemap += size; + check_usemap_section_nr(nodeid, usemap_map[pnum]); } - - printk(KERN_WARNING "%s: allocation failed\n", __func__); } #ifndef CONFIG_SPARSEMEM_VMEMMAP @@ -389,8 +379,9 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) return map; size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); - map = __alloc_bootmem_node_high(NODE_DATA(nid), size, - PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + map = memblock_virt_alloc_try_nid(size, + PAGE_SIZE, __pa(MAX_DMA_ADDRESS), + BOOTMEM_ALLOC_ACCESSIBLE, nid); return map; } void __init sparse_mem_maps_populate_node(struct page **map_map, @@ -414,8 +405,9 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, } size = PAGE_ALIGN(size); - map = __alloc_bootmem_node_high(NODE_DATA(nodeid), size * map_count, - PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + map = memblock_virt_alloc_try_nid(size * map_count, + PAGE_SIZE, __pa(MAX_DMA_ADDRESS), + BOOTMEM_ALLOC_ACCESSIBLE, nodeid); if (map) { for (pnum = pnum_begin; pnum < pnum_end; pnum++) { if (!present_section_nr(pnum)) @@ -444,11 +436,12 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER -static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, +static void __init sparse_early_mem_maps_alloc_node(void *data, unsigned long pnum_begin, unsigned long pnum_end, unsigned long map_count, int nodeid) { + struct page **map_map = (struct page **)data; sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, map_count, nodeid); } @@ -470,45 +463,22 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) } #endif -void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) +void __weak __meminit vmemmap_populate_print_last(void) { } -/* - * Allocate the accumulated non-linear sections, allocate a mem_map - * for each and record the physical to section mapping. +/** + * alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap + * @map: usemap_map for pageblock flags or mmap_map for vmemmap */ -void __init sparse_init(void) +static void __init alloc_usemap_and_memmap(void (*alloc_func) + (void *, unsigned long, unsigned long, + unsigned long, int), void *data) { unsigned long pnum; - struct page *map; - unsigned long *usemap; - unsigned long **usemap_map; - int size; + unsigned long map_count; int nodeid_begin = 0; unsigned long pnum_begin = 0; - unsigned long usemap_count; -#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER - unsigned long map_count; - int size2; - struct page **map_map; -#endif - - /* - * map is using big page (aka 2M in x86 64 bit) - * usemap is less one page (aka 24 bytes) - * so alloc 2M (with 2M align) and 24 bytes in turn will - * make next 2M slip to one more 2M later. - * then in big system, the memory will have a lot of holes... - * here try to allocate 2M pages continuously. - * - * powerpc need to call sparse_init_one_section right after each - * sparse_early_mem_map_alloc, so allocate usemap_map at first. - */ - size = sizeof(unsigned long *) * NR_MEM_SECTIONS; - usemap_map = alloc_bootmem(size); - if (!usemap_map) - panic("can not allocate usemap_map\n"); for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { struct mem_section *ms; @@ -520,7 +490,7 @@ void __init sparse_init(void) pnum_begin = pnum; break; } - usemap_count = 1; + map_count = 1; for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { struct mem_section *ms; int nodeid; @@ -530,61 +500,69 @@ void __init sparse_init(void) ms = __nr_to_section(pnum); nodeid = sparse_early_nid(ms); if (nodeid == nodeid_begin) { - usemap_count++; + map_count++; continue; } /* ok, we need to take cake of from pnum_begin to pnum - 1*/ - sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum, - usemap_count, nodeid_begin); + alloc_func(data, pnum_begin, pnum, + map_count, nodeid_begin); /* new start, update count etc*/ nodeid_begin = nodeid; pnum_begin = pnum; - usemap_count = 1; + map_count = 1; } /* ok, last chunk */ - sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, - usemap_count, nodeid_begin); + alloc_func(data, pnum_begin, NR_MEM_SECTIONS, + map_count, nodeid_begin); +} +/* + * Allocate the accumulated non-linear sections, allocate a mem_map + * for each and record the physical to section mapping. + */ +void __init sparse_init(void) +{ + unsigned long pnum; + struct page *map; + unsigned long *usemap; + unsigned long **usemap_map; + int size; #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER - size2 = sizeof(struct page *) * NR_MEM_SECTIONS; - map_map = alloc_bootmem(size2); - if (!map_map) - panic("can not allocate map_map\n"); + int size2; + struct page **map_map; +#endif - for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { - struct mem_section *ms; + /* see include/linux/mmzone.h 'struct mem_section' definition */ + BUILD_BUG_ON(!is_power_of_2(sizeof(struct mem_section))); - if (!present_section_nr(pnum)) - continue; - ms = __nr_to_section(pnum); - nodeid_begin = sparse_early_nid(ms); - pnum_begin = pnum; - break; - } - map_count = 1; - for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { - struct mem_section *ms; - int nodeid; + /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */ + set_pageblock_order(); - if (!present_section_nr(pnum)) - continue; - ms = __nr_to_section(pnum); - nodeid = sparse_early_nid(ms); - if (nodeid == nodeid_begin) { - map_count++; - continue; - } - /* ok, we need to take cake of from pnum_begin to pnum - 1*/ - sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum, - map_count, nodeid_begin); - /* new start, update count etc*/ - nodeid_begin = nodeid; - pnum_begin = pnum; - map_count = 1; - } - /* ok, last chunk */ - sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS, - map_count, nodeid_begin); + /* + * map is using big page (aka 2M in x86 64 bit) + * usemap is less one page (aka 24 bytes) + * so alloc 2M (with 2M align) and 24 bytes in turn will + * make next 2M slip to one more 2M later. + * then in big system, the memory will have a lot of holes... + * here try to allocate 2M pages continuously. + * + * powerpc need to call sparse_init_one_section right after each + * sparse_early_mem_map_alloc, so allocate usemap_map at first. + */ + size = sizeof(unsigned long *) * NR_MEM_SECTIONS; + usemap_map = memblock_virt_alloc(size, 0); + if (!usemap_map) + panic("can not allocate usemap_map\n"); + alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node, + (void *)usemap_map); + +#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER + size2 = sizeof(struct page *) * NR_MEM_SECTIONS; + map_map = memblock_virt_alloc(size2, 0); + if (!map_map) + panic("can not allocate map_map\n"); + alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node, + (void *)map_map); #endif for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { @@ -610,31 +588,39 @@ void __init sparse_init(void) vmemmap_populate_print_last(); #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER - free_bootmem(__pa(map_map), size2); + memblock_free_early(__pa(map_map), size2); #endif - free_bootmem(__pa(usemap_map), size); + memblock_free_early(__pa(usemap_map), size); } #ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_SPARSEMEM_VMEMMAP -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, - unsigned long nr_pages) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) { /* This will make the necessary allocations eventually. */ return sparse_mem_map_populate(pnum, nid); } -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) +static void __kfree_section_memmap(struct page *memmap) { - return; /* XXX: Not implemented yet */ + unsigned long start = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); + + vmemmap_free(start, end); } -static void free_map_bootmem(struct page *page, unsigned long nr_pages) +#ifdef CONFIG_MEMORY_HOTREMOVE +static void free_map_bootmem(struct page *memmap) { + unsigned long start = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); + + vmemmap_free(start, end); } +#endif /* CONFIG_MEMORY_HOTREMOVE */ #else -static struct page *__kmalloc_section_memmap(unsigned long nr_pages) +static struct page *__kmalloc_section_memmap(void) { struct page *page, *ret; - unsigned long memmap_size = sizeof(struct page) * nr_pages; + unsigned long memmap_size = sizeof(struct page) * PAGES_PER_SECTION; page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size)); if (page) @@ -648,30 +634,33 @@ static struct page *__kmalloc_section_memmap(unsigned long nr_pages) got_map_page: ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); got_map_ptr: - memset(ret, 0, memmap_size); return ret; } -static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid, - unsigned long nr_pages) +static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) { - return __kmalloc_section_memmap(nr_pages); + return __kmalloc_section_memmap(); } -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) +static void __kfree_section_memmap(struct page *memmap) { if (is_vmalloc_addr(memmap)) vfree(memmap); else free_pages((unsigned long)memmap, - get_order(sizeof(struct page) * nr_pages)); + get_order(sizeof(struct page) * PAGES_PER_SECTION)); } -static void free_map_bootmem(struct page *page, unsigned long nr_pages) +#ifdef CONFIG_MEMORY_HOTREMOVE +static void free_map_bootmem(struct page *memmap) { unsigned long maps_section_nr, removing_section_nr, i; - unsigned long magic; + unsigned long magic, nr_pages; + struct page *page = virt_to_page(memmap); + + nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) + >> PAGE_SHIFT; for (i = 0; i < nr_pages; i++, page++) { magic = (unsigned long) page->lru.next; @@ -693,50 +682,15 @@ static void free_map_bootmem(struct page *page, unsigned long nr_pages) put_page_bootmem(page); } } +#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static void free_section_usemap(struct page *memmap, unsigned long *usemap) -{ - struct page *usemap_page; - unsigned long nr_pages; - - if (!usemap) - return; - - usemap_page = virt_to_page(usemap); - /* - * Check to see if allocation came from hot-plug-add - */ - if (PageSlab(usemap_page)) { - kfree(usemap); - if (memmap) - __kfree_section_memmap(memmap, PAGES_PER_SECTION); - return; - } - - /* - * The usemap came from bootmem. This is packed with other usemaps - * on the section which has pgdat at boot time. Just keep it as is now. - */ - - if (memmap) { - struct page *memmap_page; - memmap_page = virt_to_page(memmap); - - nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page)) - >> PAGE_SHIFT; - - free_map_bootmem(memmap_page, nr_pages); - } -} - /* * returns the number of sections whose mem_maps were properly * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn, - int nr_pages) +int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct pglist_data *pgdat = zone->zone_pgdat; @@ -753,12 +707,12 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn, ret = sparse_index_init(section_nr, pgdat->node_id); if (ret < 0 && ret != -EEXIST) return ret; - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages); + memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); if (!memmap) return -ENOMEM; usemap = __kmalloc_section_usemap(); if (!usemap) { - __kfree_section_memmap(memmap, nr_pages); + __kfree_section_memmap(memmap); return -ENOMEM; } @@ -770,6 +724,8 @@ int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn, goto out; } + memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION); + ms->section_mem_map |= SECTION_MARKED_PRESENT; ret = sparse_init_one_section(ms, section_nr, memmap, usemap); @@ -778,16 +734,67 @@ out: pgdat_resize_unlock(pgdat, &flags); if (ret <= 0) { kfree(usemap); - __kfree_section_memmap(memmap, nr_pages); + __kfree_section_memmap(memmap); } return ret; } +#ifdef CONFIG_MEMORY_HOTREMOVE +#ifdef CONFIG_MEMORY_FAILURE +static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) +{ + int i; + + if (!memmap) + return; + + for (i = 0; i < PAGES_PER_SECTION; i++) { + if (PageHWPoison(&memmap[i])) { + atomic_long_sub(1, &num_poisoned_pages); + ClearPageHWPoison(&memmap[i]); + } + } +} +#else +static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) +{ +} +#endif + +static void free_section_usemap(struct page *memmap, unsigned long *usemap) +{ + struct page *usemap_page; + + if (!usemap) + return; + + usemap_page = virt_to_page(usemap); + /* + * Check to see if allocation came from hot-plug-add + */ + if (PageSlab(usemap_page) || PageCompound(usemap_page)) { + kfree(usemap); + if (memmap) + __kfree_section_memmap(memmap); + return; + } + + /* + * The usemap came from bootmem. This is packed with other usemaps + * on the section which has pgdat at boot time. Just keep it as is now. + */ + + if (memmap) + free_map_bootmem(memmap); +} + void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) { struct page *memmap = NULL; - unsigned long *usemap = NULL; + unsigned long *usemap = NULL, flags; + struct pglist_data *pgdat = zone->zone_pgdat; + pgdat_resize_lock(pgdat, &flags); if (ms->section_mem_map) { usemap = ms->pageblock_flags; memmap = sparse_decode_mem_map(ms->section_mem_map, @@ -795,7 +802,10 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) ms->section_mem_map = 0; ms->pageblock_flags = NULL; } + pgdat_resize_unlock(pgdat, &flags); + clear_hwpoisoned_pages(memmap, PAGES_PER_SECTION); free_section_usemap(memmap, usemap); } -#endif +#endif /* CONFIG_MEMORY_HOTREMOVE */ +#endif /* CONFIG_MEMORY_HOTPLUG */ |
